Mercurial > illumos > illumos-gate
changeset 10053:79ff8cfc9153
PSARC/2009/340 Interrupt affinity interfaces and PCITool enhancements
6796906 Interfaces needed for querying and re-targetting msi-x interrupts
6805710 px driver should support re-targetting MSI-X interrupts
6849547 PCITool enhancements
6851623 ddi_intr_enable() and ddi_intr_disable() fails randomly for dup interrupt.
line wrap: on
line diff
--- a/usr/src/cmd/mdb/sparc/modules/intr/intr.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/mdb/sparc/modules/intr/intr.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/mdb_modapi.h> #include <mdb/mdb_ks.h> #include <sys/async.h> /* ecc_flt for pci_ecc.h */ @@ -289,13 +287,13 @@ do { /* ino_next_p loop */ if (mdb_vread(&ipil, sizeof (px_ino_pil_t), (uintptr_t)ino.ino_ipil_p) == -1) { - return; + continue; } do { /* ipil_next_p loop */ if (mdb_vread(&ih, sizeof (px_ih_t), (uintptr_t)ipil.ipil_ih_start) == -1) { - return; + continue; } count = 0; @@ -358,11 +356,12 @@ } while (count < ipil.ipil_ih_size); - } while (mdb_vread(&ipil, sizeof (px_ino_pil_t), - (uintptr_t)ipil.ipil_next_p) != -1); + } while ((ipil.ipil_next_p != NULL) && + (mdb_vread(&ipil, sizeof (px_ino_pil_t), + (uintptr_t)ipil.ipil_next_p) != -1)); - } while (mdb_vread(&ino, sizeof (px_ino_t), - (uintptr_t)ino.ino_next_p) != -1); + } while ((ino.ino_next_p != NULL) && (mdb_vread(&ino, sizeof (px_ino_t), + (uintptr_t)ino.ino_next_p) != -1)); } static char *
--- a/usr/src/cmd/pcitool/pcitool.1m Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/pcitool/pcitool.1m Wed Jul 08 12:59:05 2009 +0800 @@ -24,20 +24,32 @@ pcitool \- interrupt routing tool .SH "SYNOPSIS" .PP -/usr/sbin/pcitool \fIPCI_nexus_node\fR -i ino=\fIino\fR [ -r [ -c ] | -w cpu=\fICPU\fR [ -g ] ] [ -v ] [ -q ] +/usr/sbin/pcitool \fIpci@<unit-address>\fR -i \fI<ino#> | all\fR [ -r [ -c ] | -w \fI<cpu#>\fR [ -g ] ] [ -v ] [ -q ] + +/usr/sbin/pcitool \fIpci@<unit-address>\fR -m \fI<msi#> | all\fR [ -r [ -c ] | -w \fI<cpu#>\fR [ -g ] ] [ -v ] [ -q ] /usr/sbin/pcitool [ -h ] .SH "DESCRIPTION" .PP -PCItool is a low-level tool which provides a facility for getting and setting interrupt routing information. +PCItool is a low-level tool which provides a facility for getting and setting +interrupt routing information. .SS \fIInterrupt Routing\fR The \fIpcitool -i\fR command displays device and CPU routing information for -all inos on a given nexus, and allows rerouting of a given ino or ino group -to a specific CPU. - +INOs on a given nexus, and allows rerouting of a given INO or INO group to a +specific CPU. Use the \fIpcitool -m\fR command to retrieve and reroute MSI/Xs. + +On SPARC platforms, the INO is mapped to an interrupt mondo, where as one or +more MSI/Xs are mapped to an INO. So, INO and MSI/Xs are individually +retargetable. Use "-i" option to retrieve or reroute a given INO, where as +use "-m" option for MSI/Xs. + +On x86 platforms, both INOs and MSI/Xs are mapped to the same interrupt vectors. +Use "-i" option to retrieve and reroute any interrupt vectors (both INO and +MSI/Xs). So, "-m" option is not required on x86 platforms. Hence it is not +supported. \fIRequired privileges\fR @@ -52,41 +64,59 @@ -r [ -c ] -Display device and CPU routing information for inos on a given nexus. -The device path and instance number of each device for each displayed ino will -be shown. On some platforms (e.g. Fire) interrupts dedicated to the root -complex are indicated with "(Internal)" appended to their pathname. +Display device and CPU routing information for INOs on a given nexus. +The device path and instance number of each device for each displayed INO will +be shown. On some platforms, interrupts dedicated to the root complex are +indicated with "(Internal)" appended to their pathname. Dump interrupt controller information with -c. If neither -r nor -w are provided on the commandline, -r is assumed. -The command for showing all inos on /pci@8,700000 is: - - # pcitool /pci@8,700000 -i - -The command for showing ino 0x23 on the same root nexus, along with sample +The command for showing all INOs on /pci@1e,600000 is: + + # pcitool /pci@1e,600000 -i all + +The command for showing ino 0x19 on the same root nexus, along with sample output, is: - - # pcitool /pci@8,700000 -i ino=23 + + # pcitool /pci@1e,600000 -i 19 + + ino 19 mapped to cpu 0 + Device: /pci@1e,600000/pci@0/pci@9/pci@0/scsi@1 + Driver: mpt, instance 0 + + ino 19 mapped to cpu 0 + Device: /pci@1e,600000/pci@0/pci@2/pci@0/network@4 + Driver: bge, instance 0 + +The command for showing MSI 0x1 on the same root nexus, +along with sample output, is: + +# pcitool /pci@1e,600000 -m 1 + + msi 1 mapped to cpu 0 + Device: /pci@1e,600000/pci@0/pci@9/pci@0/scsi@1 + Driver: mpt, instance 0 - ino 23 on ctlr 0 mapped to cpu 0 - Device: /pci@8,700000/ebus@5/i2c@1,30 - Driver: pcf8584, instance 1 - Device: /pci@8,700000/ebus@5/i2c@1,2e - Driver: pcf8584, instance 0 - --w cpu=\fIhex_CPU\fR [ -g ] +-w \fI<cpu#>\fR [ -g ] -Route the given ino to the given CPU. Display the new and original routing -information. The ino must be specified. - -Successful rerouting ino 23 above from cpu 0 to cpu 1 gives the following +Route the given INO or MSI/X to the given CPU. Display the new and original +routing information. The INO or MSI/X must be specified. + +Successful rerouting ino 19 above from cpu 0 to cpu 1 gives the following output: - - # pcitool /pci@8,700000 -i ino=23 -w cpu=1 - - Interrupts on ino 23 reassigned: Old cpu:0, New cpu:1 + + # pcitool /pci@1e,600000 -i 19 -w 1 + + Interrupts on ino 19 reassigned: Old cpu: 0, New cpu: 1 + +Successful rerouting msi 1 above from cpu 1 to cpu 0 gives the following +output: + + # pcitool /pci@1e,600000 -m 1 -w 0 + + Interrupts on msi 1 reassigned: Old cpu: 1, New cpu: 0 On some platforms (such as X86) multiple MSI interrupts of a single function need to be rerouted together. Use -g to do this. -g works only on supported @@ -94,12 +124,12 @@ When -g is used, the vector provided must be the lowest-numbered vector of the group. The size of the group is determined internally. -Successful rerouting a group of inos starting at 60 from cpu 0 to cpu 1 gives +Successful rerouting a group of INOs starting at 60 from cpu 0 to cpu 1 gives the following output: - # pcitool /pci@0,0 -i ino=60 -w cpu=1 -g + # pcitool /pci@0,0 -i 60 -w 1 -g - Interrupts on ino group starting at ino 60 reassigned: Old cpu:0, New cpu:1 + Interrupts on ino group starting at ino 60 reassigned: Old cpu: 0, New cpu: 1 -v
--- a/usr/src/cmd/pcitool/pcitool.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/pcitool/pcitool.c Wed Jul 08 12:59:05 2009 +0800 @@ -94,6 +94,8 @@ "CPU is non-existent or not online" }, { PCITOOL_INVALID_INO, "INO is out of range or invalid" }, + { PCITOOL_INVALID_MSI, + "MSI is out of range or invalid" }, { PCITOOL_PENDING_INTRTIMEOUT, "Timeout waiting for pending interrupts to clear" }, { PCITOOL_REGPROP_NOTWELLFORMED, @@ -1290,8 +1292,13 @@ { int i; - (void) printf("\nino %x mapped to cpu %x\n", - iget_p->ino, iget_p->cpu_id); + if (iget_p->flags & PCITOOL_INTR_FLAG_GET_MSI) + (void) printf("\nmsi 0x%x mapped to cpu 0x%x\n", + iget_p->msi, iget_p->cpu_id); + else + (void) printf("\nino 0x%x mapped to cpu 0x%x\n", + iget_p->ino, iget_p->cpu_id); + for (i = 0; i < iget_p->num_devs; i++) { (void) printf("Device: %s\n", iget_p->dev[i].path); (void) printf(" Driver: %s, instance %d\n", @@ -1310,29 +1317,38 @@ pcitool_uiargs_t *input_args_p) { pcitool_intr_get_t *iget_p = *iget_pp; - uint32_t ino = iget_p->ino; + const char *str_type = NULL; + uint32_t intr; + + if (input_args_p->flags & MSI_SPEC_FLAG) { + intr = input_args_p->intr_msi; + str_type = "msi"; + } else { + intr = input_args_p->intr_ino; + str_type = "ino"; + } /* - * Check if interrupts are active on this ino. Get as much - * device info as there is room for at the moment. If there + * Check if interrupts are active on this ino/msi. Get as much + * device info as there is room for at the moment. If there * is not enough room for all devices, will call again with a * larger buffer. */ if (ioctl(fd, PCITOOL_DEVICE_GET_INTR, iget_p) != 0) { - /* * Let EIO errors silently slip through, as * some inos may not be viewable by design. * We don't want to stop or print an error for these. */ - if (errno == EIO) { return (SUCCESS); } if (!(IS_QUIET(input_args_p->flags))) { - (void) fprintf(stderr, "Ioctl to get interrupt " - "%d info failed %s\n", ino, strerror(errno)); + (void) fprintf(stderr, "Ioctl to get %s 0x%x " + "info failed: %s\n", str_type, intr, + strerror(errno)); + if (errno != EFAULT) { (void) fprintf(stderr, "Pcitool status: %s\n", strstatus(iget_p->status)); @@ -1351,11 +1367,12 @@ iget_p = *iget_pp = realloc(iget_p, PCITOOL_IGET_SIZE(iget_p->num_devs)); iget_p->num_devs_ret = iget_p->num_devs; + if (ioctl(fd, PCITOOL_DEVICE_GET_INTR, iget_p) != 0) { if (!(IS_QUIET(input_args_p->flags))) { - (void) fprintf(stderr, "Ioctl to get interrupt " - "%d device info failed %s\n", - ino, strerror(errno)); + (void) fprintf(stderr, "Ioctl to get %s 0x%x" + "device info failed: %s\n", str_type, + intr, strerror(errno)); if (errno != EFAULT) { (void) fprintf(stderr, "Pcitool status: %s\n", @@ -1386,24 +1403,57 @@ iget_p->num_devs_ret = INIT_NUM_DEVS; iget_p->user_version = PCITOOL_VERSION; - /* Explicit ino requested. */ - if (input_args_p->flags & INO_SPEC_FLAG) { - iget_p->ino = input_args_p->intr_ino; + /* Explicit MSI requested. */ + if (input_args_p->flags & MSI_SPEC_FLAG) { + iget_p->msi = input_args_p->intr_msi; + iget_p->flags = PCITOOL_INTR_FLAG_GET_MSI; rval = get_single_interrupt(fd, &iget_p, input_args_p); - - } else { /* Return all inos. */ - + /* Return all MSIs. */ + } else if (input_args_p->flags & MSI_ALL_FLAG) { pcitool_intr_info_t intr_info; + intr_info.flags = PCITOOL_INTR_FLAG_GET_MSI; if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) { if (!(IS_QUIET(input_args_p->flags))) { (void) fprintf(stderr, - "intr info ioctl failed:%s\n", + "intr info ioctl failed: %s\n", strerror(errno)); } - } else { + int msi; + /* + * Search through all interrupts. + * Display info on enabled ones. + */ + for (msi = 0; + ((msi < intr_info.num_intr) && (rval == SUCCESS)); + msi++) { + bzero(iget_p, sizeof (pcitool_intr_get_t)); + iget_p->num_devs_ret = INIT_NUM_DEVS; + iget_p->user_version = PCITOOL_VERSION; + iget_p->flags = PCITOOL_INTR_FLAG_GET_MSI; + iget_p->msi = msi; + rval = get_single_interrupt( + fd, &iget_p, input_args_p); + } + } + /* Explicit INO requested. */ + } else if (input_args_p->flags & INO_SPEC_FLAG) { + iget_p->ino = input_args_p->intr_ino; + rval = get_single_interrupt(fd, &iget_p, input_args_p); + /* Return all INOs. */ + } else if (input_args_p->flags & INO_ALL_FLAG) { + pcitool_intr_info_t intr_info; + intr_info.flags = 0; + + if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) { + if (!(IS_QUIET(input_args_p->flags))) { + (void) fprintf(stderr, + "intr info ioctl failed: %s\n", + strerror(errno)); + } + } else { int ino; /* @@ -1413,6 +1463,9 @@ for (ino = 0; ((ino < intr_info.num_intr) && (rval == SUCCESS)); ino++) { + bzero(iget_p, sizeof (pcitool_intr_get_t)); + iget_p->num_devs_ret = INIT_NUM_DEVS; + iget_p->user_version = PCITOOL_VERSION; iget_p->ino = ino; rval = get_single_interrupt( fd, &iget_p, input_args_p); @@ -1433,6 +1486,7 @@ char *ctlr_type = NULL; int rval = SUCCESS; + intr_info.flags = 0; if (ioctl(fd, PCITOOL_SYSTEM_INTR_INFO, &intr_info) != 0) { if (!(IS_QUIET(input_args_p->flags))) { (void) perror("Ioctl to get intr ctlr info failed"); @@ -1488,36 +1542,46 @@ static int set_interrupts(int fd, pcitool_uiargs_t *input_args_p) { - int rval = SUCCESS; /* Return status. */ - - pcitool_intr_set_t iset; + pcitool_intr_set_t iset; + const char *str_type = NULL; + uint32_t intr; + int rval = SUCCESS; /* Return status. */ /* Load interrupt number and cpu from commandline. */ - iset.ino = input_args_p->intr_ino; + if (input_args_p->flags & MSI_SPEC_FLAG) { + iset.msi = intr = input_args_p->intr_msi; + iset.flags = PCITOOL_INTR_FLAG_SET_MSI; + str_type = "msi"; + } else { + iset.ino = intr = input_args_p->intr_ino; + iset.flags = 0; + str_type = "ino"; + } + iset.cpu_id = input_args_p->intr_cpu; iset.user_version = PCITOOL_VERSION; - iset.flags = (input_args_p->flags & SETGRP_FLAG) ? - PCITOOL_INTR_SET_FLAG_GROUP : 0; + iset.flags |= (input_args_p->flags & SETGRP_FLAG) ? + PCITOOL_INTR_FLAG_SET_GROUP : 0; /* Do the deed. */ if (ioctl(fd, PCITOOL_DEVICE_SET_INTR, &iset) != 0) { if (!(IS_QUIET(input_args_p->flags))) { (void) fprintf(stderr, - "Ioctl to set intr 0x%x failed: %s\n", - input_args_p->intr_ino, strerror(errno)); + "Ioctl to set %s 0x%x failed: %s\n", + str_type, intr, strerror(errno)); (void) fprintf(stderr, "pcitool status: %s\n", strstatus(iset.status)); } rval = errno; } else { if (input_args_p->flags & SETGRP_FLAG) { - (void) printf("\nInterrupts on ino %x reassigned:", - iset.ino); + (void) printf("\nInterrupts on %s group starting " + "at %s 0x%x reassigned:", str_type, str_type, intr); } else { - (void) printf("\nInterrupts on ino group starting " - "at ino %x reassigned:", iset.ino); + (void) printf("\nInterrupts on %s 0x%x reassigned:", + str_type, intr); } - (void) printf(" Old cpu:%x, New cpu:%x\n", iset.cpu_id, + (void) printf(" Old cpu: 0x%x, New cpu: 0x%x\n", iset.cpu_id, input_args_p->intr_cpu); }
--- a/usr/src/cmd/pcitool/pcitool_ui.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/pcitool/pcitool_ui.c Wed Jul 08 12:59:05 2009 +0800 @@ -67,11 +67,8 @@ /* * This defines which main options can be specified by the user. * Options with colons after them require arguments. - * - * First : means to return : if option is missing. This is used to handle - * the optional argument to -i. */ -static char *opt_string = ":n:d:i:p:rw:o:s:e:b:vaqlcxgy"; +static char *opt_string = ":n:d:i:m:p:rw:o:s:e:b:vaqlcxgy"; /* This defines options used singly and only by themselves (no nexus). */ static char *no_dev_opt_string = "ahpqv"; @@ -88,7 +85,8 @@ static int parse_device_opts(char *input, uint64_t *flags_arg, uint8_t *bus_arg, uint8_t *device_arg, uint8_t *func_arg, uint8_t *bank_arg); -static int parse_intr_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg); +static int parse_ino_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg); +static int parse_msi_opts(char *input, uint64_t *flags_arg, uint16_t *msi_arg); static int parse_intr_set_opts(char *input, uint64_t *flags_arg, uint32_t *cpu_arg); static int parse_probeone_opts(char *input, uint64_t *flags_arg, @@ -187,7 +185,6 @@ } if (error) { - print_bad_option(argv, optopt, optarg); return (FAILURE); } @@ -251,21 +248,15 @@ case 'i': if (parsed_args->flags & (LEAF_FLAG | NEXUS_FLAG | INTR_FLAG | PROBE_FLAGS)) { - (void) fprintf(stderr, "%s: -i set with " + (void) fprintf(stderr, "%s: -i set with -m, " "-n, -d or -p or is set twice\n", argv[0]); error = B_TRUE; break; } parsed_args->flags |= INTR_FLAG; - /* Process, say, -i -r */ - if (optarg[0] == '-') { - optind--; - continue; - } - /* parse input to get ino value. */ - if (parse_intr_opts(optarg, &parsed_args->flags, + if (parse_ino_opts(optarg, &parsed_args->flags, &parsed_args->intr_ino) != SUCCESS) { (void) fprintf(stderr, "%s: Error parsing interrupt options\n", @@ -273,7 +264,26 @@ error = B_TRUE; } break; + /* Interrupt */ + case 'm': + if (parsed_args->flags & (LEAF_FLAG | + NEXUS_FLAG | INTR_FLAG | PROBE_FLAGS)) { + (void) fprintf(stderr, "%s: -m set with -i, " + "-n, -d or -p or is set twice\n", argv[0]); + error = B_TRUE; + break; + } + parsed_args->flags |= INTR_FLAG; + /* parse input to get msi value. */ + if (parse_msi_opts(optarg, &parsed_args->flags, + &parsed_args->intr_msi) != SUCCESS) { + (void) fprintf(stderr, + "%s: Error parsing interrupt options\n", + argv[0]); + error = B_TRUE; + } + break; /* Probe */ case 'p': if (parsed_args->flags & (LEAF_FLAG | @@ -573,10 +583,6 @@ /* Option without operand. */ case ':': switch (optopt) { - case 'i': - /* Allow -i without ino=. */ - parsed_args->flags |= INTR_FLAG; - break; case 'p': /* Allow -p without bdf spec. */ parsed_args->flags |= @@ -638,22 +644,23 @@ if (parsed_args->flags & ~(INTR_FLAG | VERBOSE_FLAG | QUIET_FLAG | READ_FLAG | WRITE_FLAG | SHOWCTLR_FLAG | - SETGRP_FLAG | INO_SPEC_FLAG | CPU_SPEC_FLAG)) { + SETGRP_FLAG | INO_ALL_FLAG | INO_SPEC_FLAG | + MSI_ALL_FLAG | MSI_SPEC_FLAG | CPU_SPEC_FLAG)) { (void) fprintf(stderr, "%s: -v, -q, -r, -w, -c " - "and -g are only options options allowed.\n" - "with interrupt command.\n", argv[0]); + "-g are only options allowed with " + "interrupt command.\n", argv[0]); error = B_TRUE; } /* Need cpu and ino values for interrupt set command. */ if ((parsed_args->flags & WRITE_FLAG) && - (parsed_args->flags & - (CPU_SPEC_FLAG | INO_SPEC_FLAG)) != - (CPU_SPEC_FLAG | INO_SPEC_FLAG)) { + !(parsed_args->flags & CPU_SPEC_FLAG) && + !((parsed_args->flags & INO_SPEC_FLAG) || + (parsed_args->flags & MSI_SPEC_FLAG))) { (void) fprintf(stderr, - "%s: Both cpu and ino must be specified " - "explicitly for interrupt set command.\n", - argv[0]); + "%s: Both cpu and ino/msi must be " + "specified explicitly for interrupt " + "set command.\n", argv[0]); error = B_TRUE; } @@ -1270,59 +1277,69 @@ /* - * Parse interrupt options. This includes: - * ino=number + * Parse INO options. This includes: + * ino# | all * * input is the string of options to parse. flags_arg returns modified with * specified options set. Other args return their respective values. */ static int -parse_intr_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg) +parse_ino_opts(char *input, uint64_t *flags_arg, uint8_t *ino_arg) { - typedef enum { - ino = 0 - } intr_opts_index_t; + uint64_t value; + int rval = SUCCESS; + + if (strcmp(input, "all") == 0) { + *flags_arg |= INO_ALL_FLAG; + } else if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) { + *ino_arg = (uint8_t)value; - static char *intr_opts[] = { - "ino", - NULL - }; + if (*ino_arg != value) { + (void) fprintf(stderr, + "ino argument must fit into 8 bits.\n"); + rval = FAILURE; + } else { + *flags_arg |= INO_SPEC_FLAG; + } + } else { + (void) fprintf(stderr, + "Unrecognized option for -i\n"); + rval = FAILURE; + } - char *value; - uint64_t recv64; + return (rval); +} - int rval = SUCCESS; - - while ((*input != '\0') && (rval == SUCCESS)) { - switch (getsubopt(&input, intr_opts, &value)) { - /* ino=number */ - case ino: - if (value == NULL) { - (void) fprintf(stderr, "Missing ino value.\n"); - rval = FAILURE; - break; - } - if ((rval = get_value64(value, &recv64, HEX_ONLY)) != - SUCCESS) { - break; - } - *ino_arg = (uint8_t)recv64; - if (*ino_arg != recv64) { - (void) fprintf(stderr, - "Ino argument must fit into 8 bits.\n"); - rval = FAILURE; - break; - } - *flags_arg |= INO_SPEC_FLAG; - break; +/* + * Parse MSI options. This includes: + * msi# | all + * + * input is the string of options to parse. flags_arg returns modified with + * specified options set. Other args return their respective values. + */ +static int +parse_msi_opts(char *input, uint64_t *flags_arg, uint16_t *msi_arg) +{ + uint64_t value; + int rval = SUCCESS; - default: + if (strcmp(input, "all") == 0) { + *flags_arg |= MSI_ALL_FLAG; + } else if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) { + *msi_arg = (uint16_t)value; + + if (*msi_arg != value) { (void) fprintf(stderr, - "Unrecognized option for -i\n"); + "msi argument must fit into 16 bits.\n"); rval = FAILURE; - break; + } else { + *flags_arg |= MSI_SPEC_FLAG; } + } else { + (void) fprintf(stderr, + "Unrecognized option for -m\n"); + rval = FAILURE; } return (rval); @@ -1339,50 +1356,23 @@ static int parse_intr_set_opts(char *input, uint64_t *flags_arg, uint32_t *cpu_arg) { - typedef enum { - cpu = 0 - } intr_set_opts_index_t; + uint64_t value; + int rval = SUCCESS; - static char *intr_set_opts[] = { - "cpu", - NULL - }; - - char *value; - uint64_t recv64; - - int rval = SUCCESS; - - while ((*input != '\0') && (rval == SUCCESS)) { - switch (getsubopt(&input, intr_set_opts, &value)) { + if ((rval = get_value64(input, &value, HEX_ONLY)) == SUCCESS) { - /* cpu=value */ - case cpu: - if (value == NULL) { - (void) fprintf(stderr, "Missing cpu value.\n"); - rval = FAILURE; - break; - } - if ((rval = get_value64(value, &recv64, HEX_ONLY)) != - SUCCESS) { - break; - } - if ((long)recv64 > sysconf(_SC_CPUID_MAX)) { - (void) fprintf(stderr, "Cpu argument " - "exceeds maximum for this system type.\n"); - rval = FAILURE; - break; - } - *cpu_arg = (uint32_t)recv64; + if ((long)value > sysconf(_SC_CPUID_MAX)) { + (void) fprintf(stderr, "Cpu argument " + "exceeds maximum for this system type.\n"); + rval = FAILURE; + } else { + *cpu_arg = (uint32_t)value; *flags_arg |= CPU_SPEC_FLAG; - break; - - default: - (void) fprintf(stderr, - "Unrecognized option for -i -w\n"); + } + } else { + (void) fprintf(stderr, + "Unrecognized option for -i -m -w\n"); rval = FAILURE; - break; - } } return (rval);
--- a/usr/src/cmd/pcitool/pcitool_ui.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/pcitool/pcitool_ui.h Wed Jul 08 12:59:05 2009 +0800 @@ -43,7 +43,7 @@ */ #define NEXUS_FLAG 0x1 #define LEAF_FLAG 0x2 -#define INTR_FLAG 0x4 +#define INTR_FLAG 0x4 /* Either -i or -m specified */ #define PROBEDEV_FLAG 0x8 /* Probe a specific device */ #define PROBETREE_FLAG 0x10 /* Probe all devs on a tree */ #define PROBEALL_FLAG 0x20 /* Probe devs on all trees */ @@ -72,8 +72,11 @@ #define BUS_SPEC_FLAG (0x40000ULL << 32) #define DEV_SPEC_FLAG (0x80000ULL << 32) #define FUNC_SPEC_FLAG (0x100000ULL << 32) -#define CPU_SPEC_FLAG (0x200000ULL << 32) -#define INO_SPEC_FLAG (0x400000ULL << 32) +#define CPU_SPEC_FLAG (0x200000ULL << 32) /* -w <cpu#> */ +#define INO_ALL_FLAG (0x400000ULL << 32) /* -i all */ +#define INO_SPEC_FLAG (0x800000ULL << 32) /* -i <#ino> */ +#define MSI_ALL_FLAG (0x1000000ULL << 32) /* -m all */ +#define MSI_SPEC_FLAG (0x2000000ULL << 32) /* -m <#msi> */ /* Macros for a few heavily-used flags. */ #define IS_VERBOSE(flags) (flags & VERBOSE_FLAG) @@ -96,6 +99,7 @@ uint8_t size; uint8_t bank; uint8_t intr_ino; + uint16_t intr_msi; boolean_t big_endian; } pcitool_uiargs_t;
--- a/usr/src/cmd/pcitool/pcitool_usage.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/pcitool/pcitool_usage.c Wed Jul 08 12:59:05 2009 +0800 @@ -148,13 +148,14 @@ static char *pcitool_usage_intr[] = { "Usage:", "Interrupt mode:", -" %s <PCI nexus node> -i [ ino=<ino> ] [ -r [ -c ] | -w cpu=<CPU> ]", -" [ -v ] [ -q ]", -" (only on applicable platforms)", +" %s pci@<unit-address> -i <ino#> | all [ -r [ -c ] | -w <cpu#> [ -g ] ]", +" [ -v ] [ -q ]", +" %s pci@<unit-address> -m <msi#> | all [ -r [ -c ] | -w <cpu#> [ -g ] ]", +" [ -v ] [ -q ]", "", "where", "", -"<PCI nexus node> is a node from /devices, with \"/devices\" stripped off.", +"pci@<unit-address> is a node from /devices, with \"/devices\" stripped off.", "For example: /pci@1e,600000", "", "-v gives verbose output for all modes.", @@ -170,26 +171,36 @@ "Interrupt mode", "--------------", "", -"-i [ ino=<ino> ] changes or retrieves current CPU for interrupts of given", -"nexus and optionally given ino. Ino must be selected if -w specified.", -"If no ino is selected (as for displaying), all will be selected.", +"-i <ino#> changes or retrieves current CPU for interrupts of given nexus", +" and given INO. The special value of 'all' can be used to select all INOs.", +"", +"-m <msi#> changes or retrieves current CPU for interrupts of given nexus", +" and given MSI/X. The special value of 'all' can be used to select all", +" MSI/Xs.", "", -"-w cpu=<CPU> [ -g ] to change an ino<->CPU binding.", +" Note: On x86 platforms, both INOs and MSI/Xs are mapped to the same", +" interrupt vectors. Use -i option to retrieve and reroute any interrupt", +" vectors (both INO and MSI/Xs). So, -m option is not required on x86", +" platforms. Hence it is not supported.", +"", +" A specific INO or MSI/X must be selected if -w specified.", +"", +"-w <cpu#> [ -g ] to change an INO or MSI/X <->CPU binding.", "", " Note: On certain platforms (e.g. X86), multiple MSI interrupts of a single", " function need to be moved together. Use -g to do this. -g works only on", " supported platforms and only for groups of MSI interrupts. When -g is", -" used, ino must be the lowest-numbered vector of the group. (Use the mdb", +" used, INO must be the lowest-numbered vector of the group. (Use the mdb", " ::interrupts dcmd to discover groups of MSI vectors.) The size of the", " group is determined internally. (\"Groups\" of size 1 are accepted.)", "", -"-r [ -c ] for displaying ino<->CPU bindings of all selected inos on a given", -" nexus. -c optionally dumps controller information.", +"-r [ -c ] for displaying ino or msi <->CPU bindings of all selected INO/MSIs", +" on a given nexus. -c optionally dumps controller information.", "", -" All relevant enabled inos supporting non-nexus device interrupts will be", -" printed. For each printed ino, all supported devices and their CPU", -" binding will be displayed. On some platforms, inos dedicated to the root", -" nexus will be shown and marked with \"(Internal)\".", +" All relevant enabled INO/MSI/Xs supporting non-nexus device interrupts", +" will be printed. For each printed INO/MSI/X, all supported devices and", +" their CPU binding will be displayed. On some platforms, INOs dedicated", +" to the root nexus will be shown and marked with \"(Internal)\".", "", "When neither -r nor -w are specified, -r is the default.", NULL
--- a/usr/src/cmd/perl/contrib/Sun/Solaris/Intrs/Intrs.xs Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Intrs/Intrs.xs Wed Jul 08 12:59:05 2009 +0800 @@ -20,12 +20,10 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/stat.h> #include <sys/pci.h> @@ -72,7 +70,7 @@ } iset.ino = ino; iset.cpu_id = cpu; - iset.flags = (num_ino > 1) ? PCITOOL_INTR_SET_FLAG_GROUP : 0; + iset.flags = (num_ino > 1) ? PCITOOL_INTR_FLAG_SET_GROUP : 0; iset.user_version = PCITOOL_VERSION; ret = ioctl(fd, PCITOOL_DEVICE_SET_INTR, &iset);
--- a/usr/src/uts/common/io/pci_intr_lib.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/io/pci_intr_lib.c Wed Jul 08 12:59:05 2009 +0800 @@ -424,21 +424,13 @@ * interrupt can be disabled. */ int -pci_msi_disable_mode(dev_info_t *rdip, int type, uint_t flags) +pci_msi_disable_mode(dev_info_t *rdip, int type) { ushort_t caps_ptr, msi_ctrl; ddi_acc_handle_t cfg_hdle; - DDI_INTR_NEXDBG((CE_CONT, "pci_msi_disable_mode: rdip = 0x%p " - "flags = 0x%x\n", (void *)rdip, flags)); - - /* - * Do not turn off the master enable bit if other interrupts are - * still active. - */ - if ((flags != DDI_INTR_FLAG_BLOCK) && - (i_ddi_intr_get_current_nenables(rdip) > 1)) - return (DDI_SUCCESS); + DDI_INTR_NEXDBG((CE_CONT, "pci_msi_disable_mode: rdip = 0x%p\n", + (void *)rdip)); if (pci_get_msi_ctrl(rdip, type, &msi_ctrl, &caps_ptr, &cfg_hdle) != DDI_SUCCESS)
--- a/usr/src/uts/common/os/ddi_intr.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/os/ddi_intr.c Wed Jul 08 12:59:05 2009 +0800 @@ -728,6 +728,69 @@ } /* + * Interrupt target get/set functions + */ +int +ddi_intr_get_affinity(ddi_intr_handle_t h, ddi_intr_target_t *tgt_p) +{ + ddi_intr_handle_impl_t *hdlp = (ddi_intr_handle_impl_t *)h; + int ret; + + DDI_INTR_APIDBG((CE_CONT, "ddi_intr_get_affinity: hdlp = %p\n", + (void *)hdlp)); + + if ((hdlp == NULL) || (tgt_p == NULL)) + return (DDI_EINVAL); + + rw_enter(&hdlp->ih_rwlock, RW_READER); + if (hdlp->ih_state != DDI_IHDL_STATE_ENABLE) { + rw_exit(&hdlp->ih_rwlock); + return (DDI_EINVAL); + } + + ret = i_ddi_intr_ops(hdlp->ih_dip, hdlp->ih_dip, + DDI_INTROP_GETTARGET, hdlp, (void *)tgt_p); + + DDI_INTR_APIDBG((CE_CONT, "ddi_intr_get_affinity: target %x\n", + *tgt_p)); + + if (ret == DDI_SUCCESS) + hdlp->ih_target = *tgt_p; + + rw_exit(&hdlp->ih_rwlock); + return (ret); +} + +int +ddi_intr_set_affinity(ddi_intr_handle_t h, ddi_intr_target_t tgt) +{ + ddi_intr_handle_impl_t *hdlp = (ddi_intr_handle_impl_t *)h; + int ret; + + DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_affinity: hdlp = %p " + "target %x\n", (void *)hdlp, tgt)); + + if (hdlp == NULL) + return (DDI_EINVAL); + + rw_enter(&hdlp->ih_rwlock, RW_WRITER); + if ((hdlp->ih_state != DDI_IHDL_STATE_ENABLE) || + !(hdlp->ih_cap & DDI_INTR_FLAG_RETARGETABLE)) { + rw_exit(&hdlp->ih_rwlock); + return (DDI_EINVAL); + } + + ret = i_ddi_intr_ops(hdlp->ih_dip, hdlp->ih_dip, + DDI_INTROP_SETTARGET, hdlp, &tgt); + + if (ret == DDI_SUCCESS) + hdlp->ih_target = tgt; + + rw_exit(&hdlp->ih_rwlock); + return (ret); +} + +/* * Interrupt enable/disable/block_enable/block_disable handlers */ int @@ -979,6 +1042,21 @@ } /* + * Set the number of interrupts requested from IRM + */ +int +ddi_intr_set_nreq(dev_info_t *dip, int nreq) +{ + DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_nreq: dip %p, nreq %d\n", + (void *)dip, nreq)); + + if (dip == NULL) + return (DDI_EINVAL); + + return (i_ddi_irm_modify(dip, nreq)); +} + +/* * Soft interrupt handlers */ /* @@ -1142,21 +1220,6 @@ } /* - * Set the number of interrupts requested from IRM - */ -int -ddi_intr_set_nreq(dev_info_t *dip, int nreq) -{ - DDI_INTR_APIDBG((CE_CONT, "ddi_intr_set_nreq: dip %p, nreq %d\n", - (void *)dip, nreq)); - - if (dip == NULL) - return (DDI_EINVAL); - - return (i_ddi_irm_modify(dip, nreq)); -} - -/* * Old DDI interrupt framework * * The following DDI interrupt interfaces are obsolete.
--- a/usr/src/uts/common/sys/ddi_intr.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/sys/ddi_intr.h Wed Jul 08 12:59:05 2009 +0800 @@ -32,6 +32,7 @@ #include <sys/ddipropdefs.h> #include <sys/rwlock.h> +#include <sys/processor.h> #ifdef __cplusplus extern "C" { @@ -85,7 +86,8 @@ #define DDI_INTR_FLAG_MASKABLE 0x0010 /* (RO) maskable */ #define DDI_INTR_FLAG_PENDING 0x0020 /* (RO) int pending supported */ #define DDI_INTR_FLAG_BLOCK 0x0100 /* (RO) requires block enable */ -#define DDI_INTR_FLAG_MSI64 0x0200 /* (R0) MSI/X supports 64 bit addr */ +#define DDI_INTR_FLAG_MSI64 0x0200 /* (RO) MSI/X supports 64 bit addr */ +#define DDI_INTR_FLAG_RETARGETABLE 0x0400 /* (RO) retargetable */ /* * Macro to be used while passing interrupt priority @@ -100,6 +102,11 @@ typedef struct __ddi_softint_handle *ddi_softint_handle_t; /* + * Typedef for interrupt target + */ +typedef processorid_t ddi_intr_target_t; + +/* * Definition for behavior flag which is used with ddi_intr_alloc(9f). */ #define DDI_INTR_ALLOC_NORMAL 0 /* Non-strict alloc */ @@ -177,6 +184,12 @@ int ddi_intr_remove_handler(ddi_intr_handle_t h); /* + * Interrupt get/set affinity functions + */ +int ddi_intr_get_affinity(ddi_intr_handle_t h, ddi_intr_target_t *tgt_p); +int ddi_intr_set_affinity(ddi_intr_handle_t h, ddi_intr_target_t tgt); + +/* * Interrupt enable/disable/block_enable/block_disable functions */ int ddi_intr_enable(ddi_intr_handle_t h); @@ -196,6 +209,11 @@ int ddi_intr_get_pending(ddi_intr_handle_t h, int *pendingp); /* + * Interrupt resource management function + */ +int ddi_intr_set_nreq(dev_info_t *dip, int nreq); + +/* * Soft interrupt functions */ int ddi_intr_add_softint(dev_info_t *dip, ddi_softint_handle_t *h, @@ -206,11 +224,6 @@ int ddi_intr_set_softint_pri(ddi_softint_handle_t h, uint_t soft_pri); /* - * Interrupt resource management function - */ -int ddi_intr_set_nreq(dev_info_t *dip, int nreq); - -/* * Old DDI interrupt interfaces. * * The following DDI interrupt interfaces are obsolete.
--- a/usr/src/uts/common/sys/ddi_intr_impl.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/sys/ddi_intr_impl.h Wed Jul 08 12:59:05 2009 +0800 @@ -62,7 +62,9 @@ DDI_INTROP_CLRMASK, /* 17 clear mask */ DDI_INTROP_GETPENDING, /* 18 get pending interrupt */ DDI_INTROP_NAVAIL, /* 19 get num of available interrupts */ - DDI_INTROP_GETPOOL /* 20 get resource management pool */ + DDI_INTROP_GETPOOL, /* 20 get resource management pool */ + DDI_INTROP_GETTARGET, /* 21 get target for a given intr(s) */ + DDI_INTROP_SETTARGET /* 22 set target for a given intr(s) */ } ddi_intr_op_t; /* Version number used in the handles */ @@ -112,6 +114,14 @@ void *ih_private; /* Platform specific data */ uint_t ih_scratch1; /* Scratch1: #interrupts */ void *ih_scratch2; /* Scratch2: flag/h_array */ + + /* + * The ih_target field may not reflect the actual target that is + * currently being used for the given interrupt. This field is just a + * snapshot taken either during ddi_intr_add_handler() or + * ddi_intr_get/set_affinity() calls. + */ + ddi_intr_target_t ih_target; /* Target ID */ } ddi_intr_handle_impl_t; /* values for ih_state (strictly for interrupt handle) */
--- a/usr/src/uts/common/sys/pci_intr_lib.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/sys/pci_intr_lib.h Wed Jul 08 12:59:05 2009 +0800 @@ -42,7 +42,7 @@ extern int pci_msi_unconfigure(dev_info_t *rdip, int type, int inum); extern int pci_is_msi_enabled(dev_info_t *rdip, int type); extern int pci_msi_enable_mode(dev_info_t *rdip, int type); -extern int pci_msi_disable_mode(dev_info_t *rdip, int type, uint_t flags); +extern int pci_msi_disable_mode(dev_info_t *rdip, int type); extern int pci_msi_set_mask(dev_info_t *rdip, int type, int inum); extern int pci_msi_clr_mask(dev_info_t *rdip, int type, int inum); extern int pci_msi_get_pending(dev_info_t *rdip, int type, int inum,
--- a/usr/src/uts/common/sys/pci_tools.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/common/sys/pci_tools.h Wed Jul 08 12:59:05 2009 +0800 @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_PCI_TOOLS_H #define _SYS_PCI_TOOLS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/modctl.h> #ifdef __cplusplus @@ -107,6 +105,7 @@ PCITOOL_SUCCESS = 0x0, PCITOOL_INVALID_CPUID, PCITOOL_INVALID_INO, + PCITOOL_INVALID_MSI, PCITOOL_PENDING_INTRTIMEOUT, PCITOOL_REGPROP_NOTWELLFORMED, PCITOOL_INVALID_ADDRESS, @@ -127,16 +126,18 @@ uint16_t user_version; /* Userland program version - to krnl */ uint16_t drvr_version; /* Driver version - from kernel */ uint32_t ino; /* interrupt to set - to kernel */ + uint32_t msi; /* Specific MSI to set - to kernel */ uint32_t cpu_id; /* to: cpu to set / from: old cpu returned */ + uint32_t flags; /* to kernel */ pcitool_errno_t status; /* from kernel */ - uint32_t flags; /* to kernel */ } pcitool_intr_set_t; /* - * flags for pcitool_intr_set_t + * Flags for pcitool_intr_get/set_t/info_t */ -#define PCITOOL_INTR_SET_FLAG_GROUP 0x1 - +#define PCITOOL_INTR_FLAG_SET_GROUP 0x1 +#define PCITOOL_INTR_FLAG_GET_MSI 0x2 +#define PCITOOL_INTR_FLAG_SET_MSI 0x4 /* * PCITOOL_DEVICE_GET_INTR ioctl data structure to dump out the @@ -153,6 +154,7 @@ uint16_t user_version; /* Userland program version - to krnl */ uint16_t drvr_version; /* Driver version - from kernel */ uint32_t ino; /* interrupt number - to kernel */ + uint32_t msi; /* MSI number - to kernel */ uint8_t num_devs_ret; /* room for this # of devs to be */ /* returned - to kernel */ /* # devs returned - from kernel */ @@ -160,6 +162,7 @@ /* intrs enabled for devs if > 0 */ uint8_t ctlr; /* controller number - from kernel */ uint32_t cpu_id; /* cpu of interrupt - from kernel */ + uint32_t flags; /* to kernel */ pcitool_errno_t status; /* returned status - from kernel */ pcitool_intr_dev_t dev[1]; /* start of variable device list */ /* from kernel */ @@ -177,6 +180,7 @@ typedef struct pcitool_intr_info { uint16_t user_version; /* Userland program version - to krnl */ uint16_t drvr_version; /* Driver version - from kernel */ + uint32_t flags; /* to kernel */ uint32_t num_intr; /* Number of intrs suppt by nexus */ uint32_t ctlr_version; /* Intr ctlr HW version - from kernel */ uchar_t ctlr_type; /* A PCITOOL_CTLR_TYPE - from kernel */
--- a/usr/src/uts/i86pc/io/pci/pci_common.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/i86pc/io/pci/pci_common.c Wed Jul 08 12:59:05 2009 +0800 @@ -681,6 +681,30 @@ DDI_INTR_NEXDBG((CE_CONT, "pci: GETPENDING returned = %x\n", *(int *)result)); break; + case DDI_INTROP_GETTARGET: + DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: GETTARGET\n")); + + /* Note hdlp->ih_vector is actually an irq */ + if ((rv = pci_get_cpu_from_vecirq(hdlp->ih_vector, IS_IRQ)) == + -1) + return (DDI_FAILURE); + *(int *)result = rv; + DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: GETTARGET " + "vector = 0x%x, cpu = 0x%x\n", hdlp->ih_vector, rv)); + break; + case DDI_INTROP_SETTARGET: + DDI_INTR_NEXDBG((CE_CONT, "pci_common_intr_ops: SETTARGET\n")); + + /* hdlp->ih_vector is actually an irq */ + tmp_hdl.ih_vector = hdlp->ih_vector; + tmp_hdl.ih_flags = PSMGI_INTRBY_IRQ; + tmp_hdl.ih_private = (void *)(uintptr_t)*(int *)result; + psm_rval = (*psm_intr_ops)(rdip, &tmp_hdl, PSM_INTR_OP_SET_CPU, + &psm_status); + + if (psm_rval != PSM_SUCCESS) + return (DDI_FAILURE); + break; default: return (i_ddi_intr_ops(pdip, rdip, intr_op, hdlp, result)); }
--- a/usr/src/uts/i86pc/io/pci/pci_tools.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/i86pc/io/pci/pci_tools.c Wed Jul 08 12:59:05 2009 +0800 @@ -156,6 +156,12 @@ goto done_set_intr; } + if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) { + rval = ENOTSUP; + iset.status = PCITOOL_IO_ERROR; + goto done_set_intr; + } + if (iset.ino > APIC_MAX_VECTOR) { rval = EINVAL; iset.status = PCITOOL_INVALID_INO; @@ -179,13 +185,14 @@ */ info_hdl.ih_vector = iset.ino; info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id; + info_hdl.ih_flags = PSMGI_INTRBY_VEC; if (pcitool_debug) prom_printf("user version:%d, flags:0x%x\n", iset.user_version, iset.flags); result = ENOTSUP; if ((iset.user_version >= PCITOOL_V2) && - (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP)) { + (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) { ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU, &result); } else { @@ -259,6 +266,13 @@ DDI_SUCCESS) return (EFAULT); + if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) { + partial_iget.status = PCITOOL_IO_ERROR; + partial_iget.num_devs_ret = 0; + rval = ENOTSUP; + goto done_get_intr; + } + /* Validate argument. */ if (partial_iget.ino > APIC_MAX_VECTOR) { partial_iget.status = PCITOOL_INVALID_INO; @@ -388,6 +402,9 @@ return (EFAULT); } + if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI) + return (ENOTSUP); + /* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */ if ((rval = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c Wed Jul 08 12:59:05 2009 +0800 @@ -596,7 +596,7 @@ #if !defined(__xpv) static int -apic_set_cpu(uint32_t vector, int cpu, int *result) +apic_set_cpu(int irqno, int cpu, int *result) { apic_irq_t *irqp; ulong_t iflag; @@ -604,9 +604,8 @@ DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n")); - /* Convert the vector to the irq using vector_to_irq table. */ mutex_enter(&airq_mutex); - irqp = apic_irq_table[apic_vector_to_irq[vector]]; + irqp = apic_irq_table[irqno]; mutex_exit(&airq_mutex); if (irqp == NULL) { @@ -633,12 +632,17 @@ *result = EIO; return (PSM_FAILURE); } + /* + * keep tracking the default interrupt cpu binding + */ + irqp->airq_cpu = cpu; + *result = 0; return (PSM_SUCCESS); } static int -apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result) +apic_grp_set_cpu(int irqno, int new_cpu, int *result) { dev_info_t *orig_dip; uint32_t orig_cpu; @@ -651,6 +655,7 @@ uint32_t msi_pvm; ddi_acc_handle_t handle; int num_vectors = 0; + uint32_t vector; DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n")); @@ -659,15 +664,16 @@ * us while we're playing with it. */ mutex_enter(&airq_mutex); - irqps[0] = apic_irq_table[apic_vector_to_irq[vector]]; + irqps[0] = apic_irq_table[irqno]; orig_cpu = irqps[0]->airq_temp_cpu; orig_dip = irqps[0]->airq_dip; num_vectors = irqps[0]->airq_intin_no; + vector = irqps[0]->airq_vector; /* A "group" of 1 */ if (num_vectors == 1) { mutex_exit(&airq_mutex); - return (apic_set_cpu(vector, new_cpu, result)); + return (apic_set_cpu(irqno, new_cpu, result)); } *result = ENXIO; @@ -748,8 +754,12 @@ if (apic_rebind_all(irqps[0], new_cpu)) (void) apic_rebind_all(irqps[0], orig_cpu); else { - for (i = 1; i < num_vectors; i++) + irqps[0]->airq_cpu = new_cpu; + + for (i = 1; i < num_vectors; i++) { (void) apic_rebind_all(irqps[i], new_cpu); + irqps[i]->airq_cpu = new_cpu; + } *result = 0; /* SUCCESS */ } @@ -986,6 +996,8 @@ cap = DDI_INTR_FLAG_PENDING; if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) cap |= DDI_INTR_FLAG_MASKABLE; + else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX) + cap |= DDI_INTR_FLAG_RETARGETABLE; *result = cap; break; case PSM_INTR_OP_GET_SHARED: @@ -1036,6 +1048,15 @@ *result = EINVAL; return (PSM_FAILURE); } + if (hdlp->ih_vector > APIC_MAX_VECTOR) { + DDI_INTR_IMPLDBG((CE_CONT, + "[grp_]set_cpu: vector out of range: %d\n", + hdlp->ih_vector)); + *result = EINVAL; + return (PSM_FAILURE); + } + if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ)) + hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector]; if (intr_op == PSM_INTR_OP_SET_CPU) { if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) != PSM_SUCCESS)
--- a/usr/src/uts/sun4/io/px/px.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -599,7 +599,8 @@ /* Add PME_TO_ACK message handler */ hdl.ih_cb_func = (ddi_intr_handler_t *)px_pmeq_intr; if (px_add_msiq_intr(dip, dip, &hdl, MSG_REC, - (msgcode_t)PCIE_PME_ACK_MSG, &px_p->px_pm_msiq_id) != DDI_SUCCESS) { + (msgcode_t)PCIE_PME_ACK_MSG, -1, + &px_p->px_pm_msiq_id) != DDI_SUCCESS) { DBG(DBG_PWR, dip, "px_pwr_setup: couldn't add " " PME_TO_ACK intr\n"); goto pwr_setup_err1;
--- a/usr/src/uts/sun4/io/px/px_ib.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_ib.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * PX Interrupt Block implementation */ @@ -311,10 +309,9 @@ /* Redistribute device interrupts */ mutex_enter(&ib_p->ib_ino_lst_mutex); + px_msiq_redist(px_p); for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) { - uint32_t orig_cpuid; - /* * Recomputes the sum of interrupt weights of devices that * share the same ino upon first call marked by @@ -348,12 +345,31 @@ if ((weight == ino_p->ino_intr_weight) || ((weight >= weight_max) && (ino_p->ino_intr_weight >= weight_max))) { - orig_cpuid = ino_p->ino_cpuid; + uint32_t orig_cpuid = ino_p->ino_cpuid; + if (cpu[orig_cpuid] == NULL) orig_cpuid = CPU->cpu_id; - /* select cpuid to target and mark ino established */ - ino_p->ino_cpuid = intr_dist_cpuid(); + DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx " + "current cpuid 0x%x current default cpuid 0x%x\n", + ino_p->ino_sysino, ino_p->ino_cpuid, + ino_p->ino_default_cpuid); + + /* select target cpuid and mark ino established */ + if (ino_p->ino_default_cpuid == -1) + ino_p->ino_cpuid = ino_p->ino_default_cpuid = + intr_dist_cpuid(); + else if ((ino_p->ino_cpuid != + ino_p->ino_default_cpuid) && + (cpu_intr_on(cpu[ino_p->ino_default_cpuid]))) + ino_p->ino_cpuid = ino_p->ino_default_cpuid; + else if (!cpu_intr_on(cpu[ino_p->ino_cpuid])) + ino_p->ino_cpuid = intr_dist_cpuid(); + + DBG(DBG_IB, dip, "px_ib_intr_redist: sysino 0x%llx " + "new cpuid 0x%x new default cpuid 0x%x\n", + ino_p->ino_sysino, ino_p->ino_cpuid, + ino_p->ino_default_cpuid); /* Add device weight to targeted cpu. */ for (ipil_p = ino_p->ino_ipil_p; ipil_p; @@ -436,30 +452,41 @@ return (ino_p); } +px_ino_t * +px_ib_alloc_ino(px_ib_t *ib_p, devino_t ino_num) +{ + sysino_t sysino; + px_ino_t *ino_p; + + if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, + ino_num, &sysino) != DDI_SUCCESS) + return (NULL); + + ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP); + + ino_p->ino_next_p = ib_p->ib_ino_lst; + ib_p->ib_ino_lst = ino_p; + + ino_p->ino_ino = ino_num; + ino_p->ino_sysino = sysino; + ino_p->ino_ib_p = ib_p; + ino_p->ino_unclaimed_intrs = 0; + ino_p->ino_lopil = 0; + ino_p->ino_cpuid = ino_p->ino_default_cpuid = (cpuid_t)-1; + + return (ino_p); +} + px_ino_pil_t * px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num, uint_t pil, px_ih_t *ih_p) { px_ino_pil_t *ipil_p = kmem_zalloc(sizeof (px_ino_pil_t), KM_SLEEP); px_ino_t *ino_p; - if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL) { - sysino_t sysino; - - if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, - ino_num, &sysino) != DDI_SUCCESS) - return (NULL); - - ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP); + if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL) + ino_p = px_ib_alloc_ino(ib_p, ino_num); - ino_p->ino_next_p = ib_p->ib_ino_lst; - ib_p->ib_ino_lst = ino_p; - - ino_p->ino_ino = ino_num; - ino_p->ino_sysino = sysino; - ino_p->ino_ib_p = ib_p; - ino_p->ino_unclaimed_intrs = 0; - ino_p->ino_lopil = pil; - } + ASSERT(ino_p != NULL); ih_p->ih_next = ih_p; ipil_p->ipil_pil = pil; @@ -473,7 +500,7 @@ ino_p->ino_ipil_p = ipil_p; ino_p->ino_ipil_size++; - if (ino_p->ino_lopil > pil) + if ((ino_p->ino_lopil == 0) || (ino_p->ino_lopil > pil)) ino_p->ino_lopil = pil; return (ipil_p); @@ -508,6 +535,7 @@ if (pil > next->ipil_pil) pil = next->ipil_pil; } + /* * Value stored in pil should be the lowest pil. */ @@ -517,6 +545,11 @@ if (ino_p->ino_ipil_size) return; + ino_p->ino_lopil = 0; + + if (ino_p->ino_msiq_p) + return; + if (ib_p->ib_ino_lst == ino_p) ib_p->ib_ino_lst = ino_p->ino_next_p; else { @@ -819,6 +852,242 @@ } +/* + * Get interrupt CPU for a given ino. + * Return info only for inos which are already mapped to devices. + */ +/*ARGSUSED*/ +int +px_ib_get_intr_target(px_t *px_p, devino_t ino, cpuid_t *cpu_id_p) +{ + dev_info_t *dip = px_p->px_dip; + sysino_t sysino; + int ret; + + DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: devino %x\n", ino); + + /* Convert leaf-wide intr to system-wide intr */ + if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) + return (DDI_FAILURE); + + ret = px_lib_intr_gettarget(dip, sysino, cpu_id_p); + + DBG(DBG_IB, px_p->px_dip, "px_ib_get_intr_target: cpu_id %x\n", + *cpu_id_p); + + return (ret); +} + + +/* + * Associate a new CPU with a given ino. + * Operate only on INOs which are already mapped to devices. + */ +int +px_ib_set_intr_target(px_t *px_p, devino_t ino, cpuid_t cpu_id) +{ + dev_info_t *dip = px_p->px_dip; + cpuid_t old_cpu_id; + sysino_t sysino; + int ret = DDI_SUCCESS; + extern const int _ncpu; + extern cpu_t *cpu[]; + + DBG(DBG_IB, px_p->px_dip, "px_ib_set_intr_target: devino %x " + "cpu_id %x\n", ino, cpu_id); + + mutex_enter(&cpu_lock); + + /* Convert leaf-wide intr to system-wide intr */ + if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) { + ret = DDI_FAILURE; + goto done; + } + + if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) { + ret = DDI_FAILURE; + goto done; + } + + /* + * Get lock, validate cpu and write it. + */ + if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) { + DBG(DBG_IB, dip, "px_ib_set_intr_target: Enabling CPU %d\n", + cpu_id); + px_ib_intr_dist_en(dip, cpu_id, ino, B_TRUE); + px_ib_log_new_cpu(px_p->px_ib_p, old_cpu_id, cpu_id, ino); + } else { /* Invalid cpu */ + DBG(DBG_IB, dip, "px_ib_set_intr_target: Invalid cpuid %x\n", + cpu_id); + ret = DDI_EINVAL; + } + +done: + mutex_exit(&cpu_lock); + return (ret); +} + +hrtime_t px_ib_msix_retarget_timeout = 120ll * NANOSEC; /* 120 seconds */ + +/* + * Associate a new CPU with a given MSI/X. + * Operate only on MSI/Xs which are already mapped to devices. + */ +int +px_ib_set_msix_target(px_t *px_p, ddi_intr_handle_impl_t *hdlp, + msinum_t msi_num, cpuid_t cpu_id) +{ + px_ib_t *ib_p = px_p->px_ib_p; + px_msi_state_t *msi_state_p = &px_p->px_ib_p->ib_msi_state; + dev_info_t *dip = px_p->px_dip; + dev_info_t *rdip = hdlp->ih_dip; + msiqid_t msiq_id, old_msiq_id; + pci_msi_state_t msi_state; + msiq_rec_type_t msiq_rec_type; + msi_type_t msi_type; + px_ino_t *ino_p; + px_ih_t *ih_p, *old_ih_p; + cpuid_t old_cpu_id; + hrtime_t start_time, end_time; + int ret = DDI_SUCCESS; + extern const int _ncpu; + extern cpu_t *cpu[]; + + DBG(DBG_IB, dip, "px_ib_set_msix_target: msi_num %x new cpu_id %x\n", + msi_num, cpu_id); + + mutex_enter(&cpu_lock); + + /* Check for MSI64 support */ + if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) { + msiq_rec_type = MSI64_REC; + msi_type = MSI64_TYPE; + } else { + msiq_rec_type = MSI32_REC; + msi_type = MSI32_TYPE; + } + + if ((ret = px_lib_msi_getmsiq(dip, msi_num, + &old_msiq_id)) != DDI_SUCCESS) { + + mutex_exit(&cpu_lock); + return (ret); + } + + DBG(DBG_IB, dip, "px_ib_set_msix_target: current msiq 0x%x\n", + old_msiq_id); + + if ((ret = px_ib_get_intr_target(px_p, + px_msiqid_to_devino(px_p, old_msiq_id), + &old_cpu_id)) != DDI_SUCCESS) { + + mutex_exit(&cpu_lock); + return (ret); + } + + DBG(DBG_IB, dip, "px_ib_set_msix_target: current cpuid 0x%x\n", + old_cpu_id); + + if (cpu_id == old_cpu_id) { + + mutex_exit(&cpu_lock); + return (DDI_SUCCESS); + } + + /* + * Get lock, validate cpu and write it. + */ + if (!((cpu_id < _ncpu) && (cpu[cpu_id] && + cpu_is_online(cpu[cpu_id])))) { + /* Invalid cpu */ + DBG(DBG_IB, dip, "px_ib_set_msix_target: Invalid cpuid %x\n", + cpu_id); + + mutex_exit(&cpu_lock); + return (DDI_EINVAL); + } + + DBG(DBG_IB, dip, "px_ib_set_msix_target: Enabling CPU %d\n", cpu_id); + + if ((ret = px_add_msiq_intr(dip, rdip, hdlp, + msiq_rec_type, msi_num, cpu_id, &msiq_id)) != DDI_SUCCESS) { + DBG(DBG_IB, dip, "px_ib_set_msix_target: Add MSI handler " + "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num); + + mutex_exit(&cpu_lock); + return (ret); + } + + if ((ret = px_lib_msi_setmsiq(dip, msi_num, + msiq_id, msi_type)) != DDI_SUCCESS) { + (void) px_rem_msiq_intr(dip, rdip, + hdlp, msiq_rec_type, msi_num, msiq_id); + + mutex_exit(&cpu_lock); + return (ret); + } + + if ((ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum, + px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri, + PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num)) != DDI_SUCCESS) { + (void) px_rem_msiq_intr(dip, rdip, + hdlp, msiq_rec_type, msi_num, msiq_id); + + mutex_exit(&cpu_lock); + return (ret); + } + + mutex_exit(&cpu_lock); + mutex_enter(&ib_p->ib_ino_lst_mutex); + + ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, old_msiq_id)); + old_ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p, + hdlp->ih_pri), rdip, hdlp->ih_inum, msiq_rec_type, msi_num); + old_ih_p->ih_retarget_flag = B_TRUE; + + ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, msiq_id)); + ih_p = px_ib_intr_locate_ih(px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri), + rdip, hdlp->ih_inum, msiq_rec_type, msi_num); + ih_p->ih_retarget_flag = B_TRUE; + + if ((ret = px_lib_msi_getstate(dip, msi_num, + &msi_state)) != DDI_SUCCESS) { + (void) px_rem_msiq_intr(dip, rdip, + hdlp, msiq_rec_type, msi_num, msiq_id); + + mutex_exit(&ib_p->ib_ino_lst_mutex); + return (ret); + } + + if (msi_state == PCI_MSI_STATE_IDLE) + ih_p->ih_retarget_flag = B_FALSE; + + start_time = gethrtime(); + while ((ih_p->ih_retarget_flag == B_TRUE) && + (old_ih_p->ih_retarget_flag == B_TRUE)) { + if ((end_time = (gethrtime() - start_time)) > + px_ib_msix_retarget_timeout) { + cmn_err(CE_WARN, "MSIX retarget %x is not completed, " + "even after waiting %llx ticks\n", + msi_num, end_time); + + break; + } + + /* Wait for one second */ + delay(drv_usectohz(1000000)); + } + + mutex_exit(&ib_p->ib_ino_lst_mutex); + + ret = px_rem_msiq_intr(dip, rdip, + hdlp, msiq_rec_type, msi_num, old_msiq_id); + + return (ret); +} + + static void px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name, char *path_name, int instance) @@ -841,8 +1110,8 @@ * the px node and (Internal) when it finds no other devices (and *devs_ret > 0) */ uint8_t -pxtool_ib_get_ino_devs( - px_t *px_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs) +pxtool_ib_get_ino_devs(px_t *px_p, uint32_t ino, uint32_t msi_num, + uint8_t *devs_ret, pcitool_intr_dev_t *devs) { px_ib_t *ib_p = px_p->px_ib_p; px_ino_t *ino_p; @@ -863,7 +1132,17 @@ ((i < ipil_p->ipil_ih_size) && (i < *devs_ret)); i++, j++, ih_p = ih_p->ih_next) { (void) ddi_pathname(ih_p->ih_dip, pathname); - px_fill_in_intr_devs(&devs[i], + + if (ih_p->ih_msg_code == msi_num) { + num_devs = *devs_ret = 1; + px_fill_in_intr_devs(&devs[0], + (char *)ddi_driver_name( + ih_p->ih_dip), pathname, + ddi_get_instance(ih_p->ih_dip)); + goto done; + } + + px_fill_in_intr_devs(&devs[j], (char *)ddi_driver_name(ih_p->ih_dip), pathname, ddi_get_instance(ih_p->ih_dip)); } @@ -879,14 +1158,60 @@ num_devs = *devs_ret = 1; } +done: mutex_exit(&ib_p->ib_ino_lst_mutex); return (num_devs); } +int +pxtool_ib_get_msi_info(px_t *px_p, devino_t ino, msinum_t msi_num, + ddi_intr_handle_impl_t *hdlp) +{ + px_ib_t *ib_p = px_p->px_ib_p; + px_ino_t *ino_p; + px_ino_pil_t *ipil_p; + px_ih_t *ih_p; + int i; + + mutex_enter(&ib_p->ib_ino_lst_mutex); + + if ((ino_p = px_ib_locate_ino(ib_p, ino)) == NULL) { + mutex_exit(&ib_p->ib_ino_lst_mutex); + return (DDI_FAILURE); + } + + for (ipil_p = ino_p->ino_ipil_p; ipil_p; + ipil_p = ipil_p->ipil_next_p) { + for (i = 0, ih_p = ipil_p->ipil_ih_head; + ((i < ipil_p->ipil_ih_size) && ih_p); + i++, ih_p = ih_p->ih_next) { + + if (ih_p->ih_msg_code != msi_num) + continue; + + hdlp->ih_dip = ih_p->ih_dip; + hdlp->ih_inum = ih_p->ih_inum; + hdlp->ih_cb_func = ih_p->ih_handler; + hdlp->ih_cb_arg1 = ih_p->ih_handler_arg1; + hdlp->ih_cb_arg2 = ih_p->ih_handler_arg2; + if (ih_p->ih_rec_type == MSI64_REC) + hdlp->ih_cap = DDI_INTR_FLAG_MSI64; + hdlp->ih_pri = ipil_p->ipil_pil; + hdlp->ih_ver = DDI_INTR_VERSION; + + mutex_exit(&ib_p->ib_ino_lst_mutex); + return (DDI_SUCCESS); + } + } + + mutex_exit(&ib_p->ib_ino_lst_mutex); + return (DDI_FAILURE); +} + void -px_ib_log_new_cpu(px_ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id, +px_ib_log_new_cpu(px_ib_t *ib_p, cpuid_t old_cpu_id, cpuid_t new_cpu_id, uint32_t ino) { px_ino_t *ino_p;
--- a/usr/src/uts/sun4/io/px/px_ib.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_ib.h Wed Jul 08 12:59:05 2009 +0800 @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_PX_IB_H #define _SYS_PX_IB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -70,6 +68,7 @@ uint_t ih_intr_state; /* only used for fixed interrupts */ msiq_rec_type_t ih_rec_type; /* MSI or PCIe record type */ msgcode_t ih_msg_code; /* MSI number or PCIe message code */ + boolean_t ih_retarget_flag; /* MSI/X retarget flag */ px_ih_t *ih_next; /* Next entry in list */ uint64_t ih_ticks; /* ticks spent in this handler */ uint64_t ih_nsec; /* nsec spent in this handler */ @@ -103,7 +102,8 @@ px_ib_t *ino_ib_p; /* link back to interrupt block state */ uint_t ino_unclaimed_intrs; /* number of unclaimed intrs */ clock_t ino_spurintr_begin; /* begin time of spurious intr */ - cpuid_t ino_cpuid; /* cpu that ino is targeting */ + cpuid_t ino_cpuid; /* current cpu for this ino */ + cpuid_t ino_default_cpuid; /* default cpu for this ino */ int32_t ino_intr_weight; /* intr wt of devices sharing ino */ ushort_t ino_ipil_size; /* no of px_ino_pil_t sharing ino */ ushort_t ino_lopil; /* lowest pil sharing ino */ @@ -134,6 +134,7 @@ extern void px_ib_free_ino_all(px_ib_t *ib_p); extern px_ino_pil_t *px_ib_ino_locate_ipil(px_ino_t *ino_p, uint_t pil); +extern px_ino_t *px_ib_alloc_ino(px_ib_t *ib_p, devino_t ino_num); extern px_ino_pil_t *px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num, uint_t pil, px_ih_t *ih_p); extern void px_ib_delete_ino_pil(px_ib_t *ib_p, px_ino_pil_t *ipil_p); @@ -150,12 +151,16 @@ extern int px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip, uint_t inum, devino_t ino, uint_t pil, uint_t new_intr_state, msiq_rec_type_t rec_type, msgcode_t msg_code); - +extern int px_ib_get_intr_target(px_t *px_p, devino_t ino, cpuid_t *cpu_id_p); +extern int px_ib_set_intr_target(px_t *px_p, devino_t ino, cpuid_t cpu_id); +extern int px_ib_set_msix_target(px_t *px_p, ddi_intr_handle_impl_t *hdlp, + msinum_t msi_num, cpuid_t cpuid); extern uint8_t pxtool_ib_get_ino_devs(px_t *px_p, uint32_t ino, - uint8_t *devs_ret, pcitool_intr_dev_t *devs); -extern void px_ib_log_new_cpu(px_ib_t *ib_p, uint32_t old_cpu_id, - uint32_t new_cpu_id, uint32_t ino); - + uint32_t msi_num, uint8_t *devs_ret, pcitool_intr_dev_t *devs); +extern int pxtool_ib_get_msi_info(px_t *px_p, devino_t ino, msinum_t msi_num, + ddi_intr_handle_impl_t *hdlp); +extern void px_ib_log_new_cpu(px_ib_t *ib_p, cpuid_t old_cpu_id, + cpuid_t new_cpu_id, uint32_t ino); #ifdef __cplusplus }
--- a/usr/src/uts/sun4/io/px/px_intr.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_intr.c Wed Jul 08 12:59:05 2009 +0800 @@ -365,6 +365,8 @@ DTRACE_PROBE4(interrupt__start, dev_info_t, dip, void *, handler, caddr_t, arg1, caddr_t, arg2); + ih_p->ih_retarget_flag = B_FALSE; + /* * Special case for PCIE Error Messages. * The current frame work doesn't fit PCIE Err Msgs @@ -496,6 +498,13 @@ case DDI_INTROP_REMISR: ret = px_rem_intx_intr(dip, rdip, hdlp); break; + case DDI_INTROP_GETTARGET: + ret = px_ib_get_intr_target(px_p, hdlp->ih_vector, + (cpuid_t *)result); + break; + case DDI_INTROP_SETTARGET: + ret = DDI_ENOTSUP; + break; case DDI_INTROP_ENABLE: ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum, hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0); @@ -538,7 +547,7 @@ msinum_t msi_num; msiqid_t msiq_id; uint_t nintrs; - int i, ret = DDI_SUCCESS; + int ret = DDI_SUCCESS; DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x " "handle=%p\n", dip, rdip, intr_op, hdlp); @@ -554,9 +563,15 @@ msi_addr = msi_state_p->msi_addr32; } + (void) px_msi_get_msinum(px_p, hdlp->ih_dip, + (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum : + hdlp->ih_inum, &msi_num); + switch (intr_op) { case DDI_INTROP_GETCAP: ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result); + if (ret == DDI_SUCCESS) + *(int *)result |= DDI_INTR_FLAG_RETARGETABLE; break; case DDI_INTROP_SETCAP: DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n"); @@ -624,12 +639,8 @@ case DDI_INTROP_SETPRI: break; case DDI_INTROP_ADDISR: - if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip, - hdlp->ih_inum, &msi_num)) != DDI_SUCCESS) - return (ret); - if ((ret = px_add_msiq_intr(dip, rdip, hdlp, - msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) { + msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) { DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler " "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num); return (ret); @@ -651,7 +662,14 @@ return (ret); } - hdlp->ih_vector = msi_num; + if ((ret = px_lib_msi_setvalid(dip, msi_num, + PCI_MSI_VALID)) != DDI_SUCCESS) + return (ret); + + ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum, + px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri, + PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num); + break; case DDI_INTROP_DUPVEC: DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, " @@ -661,12 +679,20 @@ hdlp->ih_scratch1); break; case DDI_INTROP_REMISR: - msi_num = hdlp->ih_vector; - if ((ret = px_lib_msi_getmsiq(dip, msi_num, &msiq_id)) != DDI_SUCCESS) return (ret); + if ((ret = px_ib_update_intr_state(px_p, rdip, + hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id), + hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type, + msi_num)) != DDI_SUCCESS) + return (ret); + + if ((ret = px_lib_msi_setvalid(dip, msi_num, + PCI_MSI_INVALID)) != DDI_SUCCESS) + return (ret); + if ((ret = px_lib_msi_setstate(dip, msi_num, PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) return (ret); @@ -674,125 +700,78 @@ ret = px_rem_msiq_intr(dip, rdip, hdlp, msiq_rec_type, msi_num, msiq_id); - hdlp->ih_vector = 0; break; - case DDI_INTROP_ENABLE: - msi_num = hdlp->ih_vector; - - if ((ret = px_lib_msi_setvalid(dip, msi_num, - PCI_MSI_VALID)) != DDI_SUCCESS) + case DDI_INTROP_GETTARGET: + if ((ret = px_lib_msi_getmsiq(dip, msi_num, + &msiq_id)) != DDI_SUCCESS) return (ret); - if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) || - (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) { - nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip); + ret = px_ib_get_intr_target(px_p, + px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result); + break; + case DDI_INTROP_SETTARGET: + ret = px_ib_set_msix_target(px_p, hdlp, msi_num, + *(cpuid_t *)result); + break; + case DDI_INTROP_ENABLE: + /* + * curr_nenables will be greater than 0 if rdip is using + * MSI-X and also, if it is using DUP interface. If this + * curr_enables is > 1, return after clearing the mask bit. + */ + if ((pci_is_msi_enabled(rdip, hdlp->ih_type) == DDI_SUCCESS) && + (i_ddi_intr_get_current_nenables(rdip) > 0)) { + return (pci_msi_clr_mask(rdip, hdlp->ih_type, + hdlp->ih_inum)); + } - if ((ret = pci_msi_configure(rdip, hdlp->ih_type, - nintrs, hdlp->ih_inum, msi_addr, - hdlp->ih_type == DDI_INTR_TYPE_MSIX ? - msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS) - return (ret); + nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip); - if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type)) - != DDI_SUCCESS) - return (ret); - } + if ((ret = pci_msi_configure(rdip, hdlp->ih_type, + nintrs, hdlp->ih_inum, msi_addr, + hdlp->ih_type == DDI_INTR_TYPE_MSIX ? msi_num : + msi_num & ~(nintrs - 1))) != DDI_SUCCESS) + return (ret); + + if ((ret = pci_msi_enable_mode(rdip, + hdlp->ih_type)) != DDI_SUCCESS) + return (ret); if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum)) != DDI_SUCCESS) return (ret); - if (hdlp->ih_flags & DDI_INTR_MSIX_DUP) - break; - - if ((ret = px_lib_msi_getmsiq(dip, msi_num, - &msiq_id)) != DDI_SUCCESS) - return (ret); - - ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum, - px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri, - PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num); - break; case DDI_INTROP_DISABLE: - msi_num = hdlp->ih_vector; - - if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type, - hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS) - return (ret); - if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum)) != DDI_SUCCESS) return (ret); - if ((ret = px_lib_msi_setvalid(dip, msi_num, - PCI_MSI_INVALID)) != DDI_SUCCESS) - return (ret); - - if (hdlp->ih_flags & DDI_INTR_MSIX_DUP) - break; + /* + * curr_nenables will be greater than 1 if rdip is using + * MSI-X and also, if it is using DUP interface. If this + * curr_enables is > 1, return after setting the mask bit. + */ + if (i_ddi_intr_get_current_nenables(rdip) > 1) + return (DDI_SUCCESS); - if ((ret = px_lib_msi_getmsiq(dip, msi_num, - &msiq_id)) != DDI_SUCCESS) + if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type)) + != DDI_SUCCESS) return (ret); - ret = px_ib_update_intr_state(px_p, rdip, - hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id), - hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type, - msi_num); - break; case DDI_INTROP_BLOCKENABLE: nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip); - msi_num = hdlp->ih_vector; if ((ret = pci_msi_configure(rdip, hdlp->ih_type, nintrs, hdlp->ih_inum, msi_addr, msi_num & ~(nintrs - 1))) != DDI_SUCCESS) return (ret); - for (i = 0; i < nintrs; i++, msi_num++) { - if ((ret = px_lib_msi_setvalid(dip, msi_num, - PCI_MSI_VALID)) != DDI_SUCCESS) - return (ret); - - if ((ret = px_lib_msi_getmsiq(dip, msi_num, - &msiq_id)) != DDI_SUCCESS) - return (ret); - - if ((ret = px_ib_update_intr_state(px_p, rdip, - hdlp->ih_inum + i, px_msiqid_to_devino(px_p, - msiq_id), hdlp->ih_pri, PX_INTR_STATE_ENABLE, - msiq_rec_type, msi_num)) != DDI_SUCCESS) - return (ret); - } - ret = pci_msi_enable_mode(rdip, hdlp->ih_type); break; case DDI_INTROP_BLOCKDISABLE: - nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip); - msi_num = hdlp->ih_vector; - - if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type, - hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS) - return (ret); - - for (i = 0; i < nintrs; i++, msi_num++) { - if ((ret = px_lib_msi_setvalid(dip, msi_num, - PCI_MSI_INVALID)) != DDI_SUCCESS) - return (ret); - - if ((ret = px_lib_msi_getmsiq(dip, msi_num, - &msiq_id)) != DDI_SUCCESS) - return (ret); - - if ((ret = px_ib_update_intr_state(px_p, rdip, - hdlp->ih_inum + i, px_msiqid_to_devino(px_p, - msiq_id), hdlp->ih_pri, PX_INTR_STATE_DISABLE, - msiq_rec_type, msi_num)) != DDI_SUCCESS) - return (ret); - } - + ret = pci_msi_disable_mode(rdip, hdlp->ih_type); break; case DDI_INTROP_SETMASK: ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum); @@ -1030,13 +1009,16 @@ /* Select cpu, saving it for sharing and removal */ if (ipil_list == NULL) { - ino_p->ino_cpuid = intr_dist_cpuid(); + if (ino_p->ino_cpuid == -1) + ino_p->ino_cpuid = intr_dist_cpuid(); /* Enable interrupt */ px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino); } ino_done: + hdlp->ih_target = ino_p->ino_cpuid; + /* Add weight to the cpu that we are already targeting */ weight = pci_class_to_intr_weight(rdip); intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight); @@ -1133,7 +1115,7 @@ int px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type, - msgcode_t msg_code, msiqid_t *msiq_id_p) + msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p) { px_t *px_p = INST_TO_STATE(ddi_get_instance(dip)); px_ib_t *ib_p = px_p->px_ib_p; @@ -1145,23 +1127,27 @@ int32_t weight; int ret = DDI_SUCCESS; - DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x " - "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip), - hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2); - - if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) { - DBG(DBG_MSIQ, dip, "px_add_msiq_intr: " - "msiq allocation failed\n"); - return (ret); - } - - ino = px_msiqid_to_devino(px_p, *msiq_id_p); + DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x " + "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip), + ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1, + hdlp->ih_cb_arg2, cpu_id); ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code); mutex_enter(&ib_p->ib_ino_lst_mutex); + ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msiq_id_p) : + px_msiq_alloc_based_on_cpuid(px_p, rec_type, cpu_id, msiq_id_p); + + if (ret != DDI_SUCCESS) { + DBG(DBG_MSIQ, dip, "px_add_msiq_intr: " + "msiq allocation failed\n"); + goto fail; + } + + ino = px_msiqid_to_devino(px_p, *msiq_id_p); + ino_p = px_ib_locate_ino(ib_p, ino); ipil_list = ino_p ? ino_p->ino_ipil_p : NULL; @@ -1221,17 +1207,20 @@ /* Select cpu, saving it for sharing and removal */ if (ipil_list == NULL) { - ino_p->ino_cpuid = intr_dist_cpuid(); - /* Enable MSIQ */ px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE); px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID); + if (ino_p->ino_cpuid == -1) + ino_p->ino_cpuid = intr_dist_cpuid(); + /* Enable interrupt */ px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino); } ino_done: + hdlp->ih_target = ino_p->ino_cpuid; + /* Add weight to the cpu that we are already targeting */ weight = pci_class_to_intr_weight(rdip); intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight); @@ -1249,6 +1238,8 @@ fail2: px_ib_delete_ino_pil(ib_p, ipil_p); fail1: + (void) px_msiq_free(px_p, *msiq_id_p); +fail: if (ih_p->ih_config_handle) pci_config_teardown(&ih_p->ih_config_handle); @@ -1309,13 +1300,11 @@ if (ino_p->ino_ipil_size == 0) px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID); - - (void) px_msiq_free(px_p, msiq_id); } - if (ino_p->ino_ipil_size == 0) { - kmem_free(ino_p, sizeof (px_ino_t)); - } else { + (void) px_msiq_free(px_p, msiq_id); + + if (ino_p->ino_ipil_size) { /* Re-enable interrupt only if mapping register still shared */ PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu); }
--- a/usr/src/uts/sun4/io/px/px_intr.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_intr.h Wed Jul 08 12:59:05 2009 +0800 @@ -44,7 +44,7 @@ extern int px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type, - msgcode_t msg_code, msiqid_t *msiq_id_p); + msgcode_t msg_code, cpuid_t cpuid, msiqid_t *msiq_id_p); extern int px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type, msgcode_t msg_code, msiqid_t msiq_id);
--- a/usr/src/uts/sun4/io/px/px_msiq.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_msiq.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #include <sys/conf.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <sys/machsystm.h> /* intr_dist_add */ #include <sys/modctl.h> #include <sys/disp.h> #include <sys/stat.h> @@ -46,7 +47,8 @@ int px_msiq_attach(px_t *px_p) { - px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; + px_ib_t *ib_p = px_p->px_ib_p; + px_msiq_state_t *msiq_state_p = &ib_p->ib_msiq_state; int i, ret = DDI_SUCCESS; DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_attach\n"); @@ -68,11 +70,8 @@ msiq_state_p->msiq_msg_qcnt; msiq_state_p->msiq_1st_msi_qid = msiq_state_p->msiq_1st_msiq_id; - msiq_state_p->msiq_next_msi_qid = msiq_state_p->msiq_1st_msi_qid; - msiq_state_p->msiq_1st_msg_qid = msiq_state_p->msiq_1st_msiq_id + msiq_state_p->msiq_msi_qcnt; - msiq_state_p->msiq_next_msg_qid = msiq_state_p->msiq_1st_msg_qid; mutex_init(&msiq_state_p->msiq_mutex, NULL, MUTEX_DRIVER, NULL); msiq_state_p->msiq_p = kmem_zalloc(msiq_state_p->msiq_cnt * @@ -83,11 +82,14 @@ msiq_state_p->msiq_1st_msiq_id + i; msiq_state_p->msiq_p[i].msiq_refcnt = 0; msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_FREE; + (void) px_ib_alloc_ino(ib_p, px_msiqid_to_devino(px_p, + msiq_state_p->msiq_p[i].msiq_id)); } if ((ret = px_lib_msiq_init(px_p->px_dip)) != DDI_SUCCESS) px_msiq_detach(px_p); + msiq_state_p->msiq_redist_flag = B_TRUE; return (ret); } @@ -110,11 +112,11 @@ kmem_free(msiq_state_p->msiq_p, msiq_state_p->msiq_cnt * sizeof (px_msiq_t)); - bzero(&px_p->px_ib_p->ib_msiq_state, sizeof (px_msiq_state_t)); + bzero(msiq_state_p, sizeof (px_msiq_state_t)); } /* - * px_msiq_detach() + * px_msiq_resume() */ void px_msiq_resume(px_t *px_p) @@ -123,7 +125,8 @@ int i; for (i = 0; i < msiq_state_p->msiq_cnt; i++) { - (void) px_lib_msiq_gethead(px_p->px_dip, i, + (void) px_lib_msiq_gethead(px_p->px_dip, + msiq_state_p->msiq_p[i].msiq_id, &msiq_state_p->msiq_p[i].msiq_curr_head_index); msiq_state_p->msiq_p[i].msiq_new_head_index = 0; msiq_state_p->msiq_p[i].msiq_recs2process = 0; @@ -136,53 +139,128 @@ int px_msiq_alloc(px_t *px_p, msiq_rec_type_t rec_type, msiqid_t *msiq_id_p) { - px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; - msiqid_t first_msiq_id, *next_msiq_index; + px_ib_t *ib_p = px_p->px_ib_p; + px_msiq_state_t *msiq_state_p = &ib_p->ib_msiq_state; + msiqid_t first_msiq_id; uint_t msiq_cnt; + ushort_t least_refcnt; int i; DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc\n"); + ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex)); + mutex_enter(&msiq_state_p->msiq_mutex); + + if (rec_type == MSG_REC) { + msiq_cnt = msiq_state_p->msiq_msg_qcnt; + first_msiq_id = msiq_state_p->msiq_1st_msg_qid; + } else { + msiq_cnt = msiq_state_p->msiq_msi_qcnt; + first_msiq_id = msiq_state_p->msiq_1st_msi_qid; + } + + *msiq_id_p = first_msiq_id; + least_refcnt = msiq_state_p->msiq_p[first_msiq_id].msiq_refcnt; + + /* Allocate MSIQs */ + for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) { + if (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE) { + msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_INUSE; + (void) px_lib_msiq_gethead(px_p->px_dip, i, + &msiq_state_p->msiq_p[i].msiq_curr_head_index); + *msiq_id_p = msiq_state_p->msiq_p[i].msiq_id; + break; + } + + if (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt) { + *msiq_id_p = msiq_state_p->msiq_p[i].msiq_id; + least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt; + } + } + + msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++; + + DBG(DBG_MSIQ, px_p->px_dip, + "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p); + + mutex_exit(&msiq_state_p->msiq_mutex); + return (DDI_SUCCESS); +} + +/* + * px_msiq_alloc_based_on_cpuid() + */ +int +px_msiq_alloc_based_on_cpuid(px_t *px_p, msiq_rec_type_t rec_type, + cpuid_t cpuid, msiqid_t *msiq_id_p) +{ + px_ib_t *ib_p = px_p->px_ib_p; + px_msiq_state_t *msiq_state_p = &ib_p->ib_msiq_state; + msiqid_t first_msiq_id, free_msiq_id; + uint_t msiq_cnt; + ushort_t least_refcnt; + px_ino_t *ino_p; + int i; + + DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc_based_on_cpuid: " + "cpuid 0x%x\n", cpuid); + + ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex)); + mutex_enter(&msiq_state_p->msiq_mutex); if (rec_type == MSG_REC) { msiq_cnt = msiq_state_p->msiq_msg_qcnt; first_msiq_id = msiq_state_p->msiq_1st_msg_qid; - next_msiq_index = &msiq_state_p->msiq_next_msg_qid; } else { msiq_cnt = msiq_state_p->msiq_msi_qcnt; first_msiq_id = msiq_state_p->msiq_1st_msi_qid; - next_msiq_index = &msiq_state_p->msiq_next_msi_qid; } + *msiq_id_p = free_msiq_id = (msiqid_t)-1; + least_refcnt = (ushort_t)-1; + /* Allocate MSIQs */ for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) { - if (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE) { - msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_INUSE; - msiq_state_p->msiq_p[i].msiq_refcnt = 1; - (void) px_lib_msiq_gethead(px_p->px_dip, i, - &msiq_state_p->msiq_p[i].msiq_curr_head_index); - break; + ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, i)); + + if ((ino_p->ino_cpuid == cpuid) && + (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt)) { + *msiq_id_p = msiq_state_p->msiq_p[i].msiq_id; + least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt; } + + if ((*msiq_id_p == -1) && (free_msiq_id == -1) && + (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE)) + free_msiq_id = msiq_state_p->msiq_p[i].msiq_id; } - /* - * There are no free MSIQ. - * Use next available MSIQ. - */ - if (i >= (first_msiq_id + msiq_cnt)) { - i = *next_msiq_index; - msiq_state_p->msiq_p[i].msiq_refcnt++; + if (*msiq_id_p == -1) { + if (free_msiq_id == -1) { + DBG(DBG_MSIQ, px_p->px_dip, + "px_msiq_alloc_based_on_cpuid: No EQ is available " + "for CPU 0x%x\n", cpuid); + + mutex_exit(&msiq_state_p->msiq_mutex); + return (DDI_EINVAL); + } + + *msiq_id_p = free_msiq_id; + ino_p = px_ib_locate_ino(ib_p, + px_msiqid_to_devino(px_p, *msiq_id_p)); + ino_p->ino_cpuid = ino_p->ino_default_cpuid = cpuid; } - *msiq_id_p = msiq_state_p->msiq_p[i].msiq_id; - DBG(DBG_MSIQ, px_p->px_dip, - "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p); + if (msiq_state_p->msiq_p[*msiq_id_p].msiq_state == MSIQ_STATE_FREE) { + msiq_state_p->msiq_p[*msiq_id_p].msiq_state = MSIQ_STATE_INUSE; + (void) px_lib_msiq_gethead(px_p->px_dip, *msiq_id_p, + &msiq_state_p->msiq_p[*msiq_id_p].msiq_curr_head_index); + } - (*next_msiq_index)++; + msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++; - if (*next_msiq_index >= (first_msiq_id + msiq_cnt)) - *next_msiq_index = first_msiq_id; + DBG(DBG_MSIQ, px_p->px_dip, + "px_msiq_alloc_based_on_cpuid: msiq_id 0x%x\n", *msiq_id_p); mutex_exit(&msiq_state_p->msiq_mutex); return (DDI_SUCCESS); @@ -194,16 +272,20 @@ int px_msiq_free(px_t *px_p, msiqid_t msiq_id) { - px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; + px_ib_t *ib_p = px_p->px_ib_p; + px_msiq_state_t *msiq_state_p = &ib_p->ib_msiq_state; DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_free: msiq_id 0x%x", msiq_id); + ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex)); mutex_enter(&msiq_state_p->msiq_mutex); if ((msiq_id < msiq_state_p->msiq_1st_msiq_id) || (msiq_id >= (msiq_state_p->msiq_1st_msiq_id + msiq_state_p->msiq_cnt))) { DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_free: Invalid msiq_id 0x%x", msiq_id); + + mutex_exit(&msiq_state_p->msiq_mutex); return (DDI_FAILURE); } @@ -215,6 +297,45 @@ } /* + * px_msiq_redist() + */ +void +px_msiq_redist(px_t *px_p) +{ + px_ib_t *ib_p = px_p->px_ib_p; + px_msiq_state_t *msiq_state_p = &ib_p->ib_msiq_state; + px_ino_t *ino_p; + int i; + + ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex)); + + mutex_enter(&msiq_state_p->msiq_mutex); + + if (msiq_state_p->msiq_redist_flag == B_FALSE) { + mutex_exit(&msiq_state_p->msiq_mutex); + return; + } + + for (i = 0; i < msiq_state_p->msiq_cnt; i++) { + ino_p = px_ib_locate_ino(ib_p, + px_msiqid_to_devino(px_p, msiq_state_p->msiq_p[i].msiq_id)); + + if (ino_p) { + ino_p->ino_cpuid = ino_p->ino_default_cpuid = + intr_dist_cpuid(); + + DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_redist: " + "sysino 0x%llx current cpuid 0x%x " + "default cpuid 0x%x\n", ino_p->ino_sysino, + ino_p->ino_cpuid, ino_p->ino_default_cpuid); + } + } + + msiq_state_p->msiq_redist_flag = B_FALSE; + mutex_exit(&msiq_state_p->msiq_mutex); +} + +/* * px_msiqid_to_devino() */ devino_t
--- a/usr/src/uts/sun4/io/px/px_msiq.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_msiq.h Wed Jul 08 12:59:05 2009 +0800 @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_PX_MSIQ_H #define _SYS_PX_MSIQ_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -59,16 +57,15 @@ uint_t msiq_rec_cnt; /* # of records per MSIQ */ msiqid_t msiq_1st_msiq_id; /* First MSIQ ID */ devino_t msiq_1st_devino; /* First devino */ + boolean_t msiq_redist_flag; /* Flag to redist MSIQs */ /* MSIQs specific reserved for MSI/Xs */ uint_t msiq_msi_qcnt; /* # of MSIQs for MSI/Xs */ msiqid_t msiq_1st_msi_qid; /* First MSIQ ID for MSI/Xs */ - msiqid_t msiq_next_msi_qid; /* Next MSIQ index for MSI/Xs */ /* MSIQs specific reserved for PCIe messages */ uint_t msiq_msg_qcnt; /* # of MSIQs for PCIe msgs */ msiqid_t msiq_1st_msg_qid; /* First MSIQ ID for PCIe msgs */ - msiqid_t msiq_next_msg_qid; /* Next MSIQ index for PCIe msgs */ px_msiq_t *msiq_p; /* Pointer to MSIQs array */ void *msiq_buf_p; /* Pointer to MSIQs array */ @@ -98,7 +95,11 @@ extern int px_msiq_alloc(px_t *px_p, msiq_rec_type_t rec_type, msiqid_t *msiq_id_p); +extern int px_msiq_alloc_based_on_cpuid(px_t *px_p, + msiq_rec_type_t rec_type, cpuid_t cpuid, + msiqid_t *msiq_id_p); extern int px_msiq_free(px_t *px_p, msiqid_t msiq_id); +extern void px_msiq_redist(px_t *px_p); extern devino_t px_msiqid_to_devino(px_t *px_p, msiqid_t msiq_id); extern msiqid_t px_devino_to_msiqid(px_t *px_p, devino_t devino);
--- a/usr/src/uts/sun4/io/px/px_pec.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_pec.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * PCI Express PEC implementation: * initialization @@ -181,7 +179,7 @@ hdl.ih_pri = PX_ERR_LOW_PIL; if ((ret = px_add_msiq_intr(dip, dip, &hdl, - MSG_REC, (msgcode_t)PCIE_CORR_MSG, + MSG_REC, (msgcode_t)PCIE_CORR_MSG, -1, &pec_p->pec_corr_msg_msiq_id)) != DDI_SUCCESS) { DBG(DBG_MSG, px_p->px_dip, "PCIE_CORR_MSG registration failed\n"); @@ -204,7 +202,7 @@ hdl.ih_pri = PX_ERR_PIL; if ((ret = px_add_msiq_intr(dip, dip, &hdl, - MSG_REC, (msgcode_t)PCIE_NONFATAL_MSG, + MSG_REC, (msgcode_t)PCIE_NONFATAL_MSG, -1, &pec_p->pec_non_fatal_msg_msiq_id)) != DDI_SUCCESS) { DBG(DBG_MSG, px_p->px_dip, "PCIE_NONFATAL_MSG registration failed\n"); @@ -228,7 +226,7 @@ hdl.ih_pri = PX_ERR_PIL; if ((ret = px_add_msiq_intr(dip, dip, &hdl, - MSG_REC, (msgcode_t)PCIE_FATAL_MSG, + MSG_REC, (msgcode_t)PCIE_FATAL_MSG, -1, &pec_p->pec_fatal_msg_msiq_id)) != DDI_SUCCESS) { DBG(DBG_MSG, px_p->px_dip, "PCIE_FATAL_MSG registration failed\n");
--- a/usr/src/uts/sun4/io/px/px_tools.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/io/px/px_tools.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/stat.h> #include <sys/cpuvar.h> @@ -68,26 +66,13 @@ int pci_num_bars = sizeof (pci_bars) / sizeof (pci_bars[0]); -/* - * Validate the cpu_id passed in. - * A value of 1 will be returned for success and zero for failure. - */ -static int -pxtool_validate_cpuid(uint32_t cpuid) -{ - extern const int _ncpu; - extern cpu_t *cpu[]; - - ASSERT(mutex_owned(&cpu_lock)); - - return ((cpuid < _ncpu) && (cpu[cpuid] && cpu_is_online(cpu[cpuid]))); -} - /*ARGSUSED*/ static int pxtool_intr_info(dev_info_t *dip, void *arg, int mode) { + px_t *px_p = DIP_TO_STATE(dip); + px_msi_state_t *msi_state_p = &px_p->px_ib_p->ib_msi_state; pcitool_intr_info_t intr_info; int rval = SUCCESS; @@ -99,7 +84,10 @@ intr_info.ctlr_version = 0; /* XXX how to get real version? */ intr_info.ctlr_type = PCITOOL_CTLR_TYPE_RISC; - intr_info.num_intr = pxtool_num_inos; + if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI) + intr_info.num_intr = msi_state_p->msi_cnt; + else + intr_info.num_intr = pxtool_num_inos; intr_info.drvr_version = PCITOOL_VERSION; if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) != @@ -125,44 +113,65 @@ { /* Array part isn't used here, but oh well... */ pcitool_intr_get_t partial_iget; - uint32_t ino; - uint8_t num_devs_ret; + pcitool_intr_get_t *iget = &partial_iget; int copyout_rval; sysino_t sysino; intr_valid_state_t intr_valid_state; cpuid_t old_cpu_id; px_t *px_p = DIP_TO_STATE(dip); - pcitool_intr_get_t *iget = &partial_iget; size_t iget_kmem_alloc_size = 0; - int rval = SUCCESS; + int rval = EIO; /* Read in just the header part, no array section. */ if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) != DDI_SUCCESS) return (EFAULT); - ino = partial_iget.ino; - num_devs_ret = partial_iget.num_devs_ret; + iget->status = PCITOOL_IO_ERROR; + + if (iget->flags & PCITOOL_INTR_FLAG_GET_MSI) { + px_msi_state_t *msi_state_p = &px_p->px_ib_p->ib_msi_state; + pci_msi_valid_state_t msi_state; + msiqid_t msiq_id; - partial_iget.num_devs_ret = 0; /* Assume error for now. */ - partial_iget.status = PCITOOL_INVALID_INO; - rval = EINVAL; + if ((iget->msi < msi_state_p->msi_1st_msinum) || + (iget->msi >= (msi_state_p->msi_1st_msinum + + msi_state_p->msi_cnt))) { + iget->status = PCITOOL_INVALID_MSI; + rval = EINVAL; + goto done_get_intr; + } + + if ((px_lib_msi_getvalid(dip, iget->msi, + &msi_state) != DDI_SUCCESS) || + (msi_state != PCI_MSI_VALID)) + goto done_get_intr; + + if (px_lib_msi_getmsiq(dip, iget->msi, + &msiq_id) != DDI_SUCCESS) + goto done_get_intr; + + iget->ino = px_msiqid_to_devino(px_p, msiq_id); + } else { + iget->msi = (uint32_t)-1; + } /* Validate argument. */ - if (partial_iget.ino > pxtool_num_inos) { + if (iget->ino > pxtool_num_inos) { + iget->status = PCITOOL_INVALID_INO; + rval = EINVAL; goto done_get_intr; } /* Caller wants device information returned. */ - if (num_devs_ret > 0) { - + if (iget->num_devs_ret > 0) { /* * Allocate room. * Note if num_devs == 0 iget remains pointing to * partial_iget. */ - iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret); - iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP); + iget_kmem_alloc_size = PCITOOL_IGET_SIZE(iget->num_devs_ret); + iget = kmem_zalloc(iget_kmem_alloc_size, KM_SLEEP); /* Read in whole structure to verify there's room. */ if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) != @@ -175,21 +184,17 @@ } } - bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret)); - iget->ino = ino; - iget->num_devs_ret = num_devs_ret; - /* Convert leaf-wide intr to system-wide intr */ - if (px_lib_intr_devino_to_sysino(dip, iget->ino, &sysino) == - DDI_FAILURE) { + if (px_lib_intr_devino_to_sysino(dip, iget->ino, &sysino) != + DDI_SUCCESS) { iget->status = PCITOOL_IO_ERROR; rval = EIO; goto done_get_intr; } /* Operate only on inos which are already enabled. */ - if (px_lib_intr_getvalid(dip, sysino, &intr_valid_state) == - DDI_FAILURE) { + if (px_lib_intr_getvalid(dip, sysino, &intr_valid_state) != + DDI_SUCCESS) { iget->status = PCITOOL_IO_ERROR; rval = EIO; goto done_get_intr; @@ -200,20 +205,20 @@ * as well as those mapped to devices. */ if (intr_valid_state == INTR_VALID) { - /* * The following looks up the px_ino and returns * info of devices mapped to this ino. */ - iget->num_devs = pxtool_ib_get_ino_devs( - px_p, ino, &iget->num_devs_ret, iget->dev); + iget->num_devs = pxtool_ib_get_ino_devs(px_p, iget->ino, + iget->msi, &iget->num_devs_ret, iget->dev); - if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) == - DDI_FAILURE) { + if (px_ib_get_intr_target(px_p, iget->ino, + &old_cpu_id) != DDI_SUCCESS) { iget->status = PCITOOL_IO_ERROR; rval = EIO; goto done_get_intr; } + iget->cpu_id = old_cpu_id; } @@ -223,7 +228,7 @@ done_get_intr: iget->drvr_version = PCITOOL_VERSION; copyout_rval = - ddi_copyout(iget, arg, PCITOOL_IGET_SIZE(num_devs_ret), mode); + ddi_copyout(iget, arg, PCITOOL_IGET_SIZE(iget->num_devs_ret), mode); if (iget_kmem_alloc_size > 0) kmem_free(iget, iget_kmem_alloc_size); @@ -246,10 +251,11 @@ pcitool_intr_set_t iset; cpuid_t old_cpu_id; sysino_t sysino; + intr_valid_state_t intr_valid_state; px_t *px_p = DIP_TO_STATE(dip); - px_ib_t *ib_p = px_p->px_ib_p; - uint8_t zero = 0; - int rval = SUCCESS; + msiqid_t msiq_id; + int rval = EIO; + int ret = DDI_SUCCESS; size_t copyinout_size; bzero(&iset, sizeof (pcitool_intr_set_t)); @@ -276,57 +282,108 @@ goto done_set_intr; } - if (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP) { + if (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP) { iset.status = PCITOOL_IO_ERROR; rval = ENOTSUP; goto done_set_intr; } - iset.status = PCITOOL_INVALID_INO; - rval = EINVAL; + iset.status = PCITOOL_IO_ERROR; + + if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) { + px_msi_state_t *msi_state_p = &px_p->px_ib_p->ib_msi_state; + pci_msi_valid_state_t msi_state; + + if ((iset.msi < msi_state_p->msi_1st_msinum) || + (iset.msi >= (msi_state_p->msi_1st_msinum + + msi_state_p->msi_cnt))) { + iset.status = PCITOOL_INVALID_MSI; + rval = EINVAL; + goto done_set_intr; + } + + if ((px_lib_msi_getvalid(dip, iset.msi, + &msi_state) != DDI_SUCCESS) || + (msi_state != PCI_MSI_VALID)) + goto done_set_intr; + + if (px_lib_msi_getmsiq(dip, iset.msi, + &msiq_id) != DDI_SUCCESS) + goto done_set_intr; + + iset.ino = px_msiqid_to_devino(px_p, msiq_id); + } else { + iset.msi = (uint32_t)-1; + } /* Validate input argument. */ - if (iset.ino > pxtool_num_inos) + if (iset.ino > pxtool_num_inos) { + iset.status = PCITOOL_INVALID_INO; + rval = EINVAL; + goto done_set_intr; + } + + /* Convert leaf-wide intr to system-wide intr */ + if (px_lib_intr_devino_to_sysino(dip, iset.ino, &sysino) != + DDI_SUCCESS) goto done_set_intr; - /* Validate that ino given belongs to a device. */ - if (pxtool_ib_get_ino_devs(px_p, iset.ino, &zero, NULL) == 0) + /* Operate only on inos which are already enabled. */ + if ((px_lib_intr_getvalid(dip, sysino, &intr_valid_state) != + DDI_SUCCESS) || (intr_valid_state == INTR_NOTVALID)) goto done_set_intr; /* - * Get lock, validate cpu and write new mapreg value. - * Return original cpu value to caller via iset.cpu. + * Consider all valid inos: those mapped to the root complex itself + * as well as those mapped to devices. */ - mutex_enter(&cpu_lock); - if (pxtool_validate_cpuid(iset.cpu_id)) { + if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) + goto done_set_intr; - DBG(DBG_TOOLS, dip, "Enabling CPU %d\n", iset.cpu_id); + if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) { + ddi_intr_handle_impl_t hdle; - if (px_lib_intr_devino_to_sysino(dip, iset.ino, &sysino) == - DDI_FAILURE) - goto done_set_intr; - - if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) == - DDI_FAILURE) + bzero(&hdle, sizeof (ddi_intr_handle_impl_t)); + if (pxtool_ib_get_msi_info(px_p, iset.ino, iset.msi, + &hdle) != DDI_SUCCESS) { + iset.status = PCITOOL_INVALID_MSI; + rval = EINVAL; goto done_set_intr; - - px_ib_intr_dist_en(dip, iset.cpu_id, iset.ino, B_TRUE); - - px_ib_log_new_cpu(ib_p, old_cpu_id, iset.cpu_id, iset.ino); + } - iset.cpu_id = old_cpu_id; - iset.status = PCITOOL_SUCCESS; - rval = SUCCESS; + if ((ret = px_ib_set_msix_target(px_p, &hdle, iset.msi, + iset.cpu_id)) == DDI_SUCCESS) { + (void) px_lib_msi_getmsiq(dip, iset.msi, &msiq_id); + iset.ino = px_msiqid_to_devino(px_p, msiq_id); + iset.cpu_id = old_cpu_id; + iset.status = PCITOOL_SUCCESS; + rval = SUCCESS; + goto done_set_intr; + } + } else { + if ((ret = px_ib_set_intr_target(px_p, iset.ino, + iset.cpu_id)) == DDI_SUCCESS) { + iset.cpu_id = old_cpu_id; + iset.status = PCITOOL_SUCCESS; + rval = SUCCESS; + goto done_set_intr; + } + } - } else { /* Invalid cpu. Restore original register image. */ - - DBG(DBG_TOOLS, dip, - "Invalid cpuid: writing orig mapreg value\n"); - + switch (ret) { + case DDI_EPENDING: + iset.status = PCITOOL_PENDING_INTRTIMEOUT; + rval = ETIME; + break; + case DDI_EINVAL: iset.status = PCITOOL_INVALID_CPUID; rval = EINVAL; + break; + default: + iset.status = PCITOOL_IO_ERROR; + rval = EIO; + break; } - mutex_exit(&cpu_lock); done_set_intr: iset.drvr_version = PCITOOL_VERSION;
--- a/usr/src/uts/sun4/os/ddi_impl.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4/os/ddi_impl.c Wed Jul 08 12:59:05 2009 +0800 @@ -743,6 +743,8 @@ switch (op) { case DDI_INTROP_ADDISR: case DDI_INTROP_REMISR: + case DDI_INTROP_GETTARGET: + case DDI_INTROP_SETTARGET: case DDI_INTROP_ENABLE: case DDI_INTROP_DISABLE: case DDI_INTROP_BLOCKENABLE:
--- a/usr/src/uts/sun4u/io/pci/pci.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4u/io/pci/pci.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1298,8 +1298,8 @@ pci_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result) { - pci_t *pci_p = get_pci_soft_state( - ddi_get_instance(dip)); + pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip)); + ib_ino_t ino; int ret = DDI_SUCCESS; switch (intr_op) { @@ -1327,6 +1327,14 @@ case DDI_INTROP_REMISR: ret = pci_remove_intr(dip, rdip, hdlp); break; + case DDI_INTROP_GETTARGET: + ino = IB_MONDO_TO_INO(pci_xlate_intr(dip, rdip, + pci_p->pci_ib_p, IB_MONDO_TO_INO(hdlp->ih_vector))); + ret = ib_get_intr_target(pci_p, ino, (int *)result); + break; + case DDI_INTROP_SETTARGET: + ret = DDI_ENOTSUP; + break; case DDI_INTROP_ENABLE: ret = ib_update_intr_state(pci_p, rdip, hdlp, PCI_INTR_STATE_ENABLE);
--- a/usr/src/uts/sun4u/io/pci/pci_ib.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4u/io/pci/pci_ib.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * PCI Interrupt Block (RISCx) implementation * initialization @@ -895,6 +893,122 @@ } /* + * Get interrupt CPU for a given ino. + * Return info only for inos which are already mapped to devices. + */ +/*ARGSUSED*/ +int +ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p) +{ + dev_info_t *dip = pci_p->pci_dip; + ib_t *ib_p = pci_p->pci_ib_p; + volatile uint64_t *imregp; + uint64_t imregval; + + DEBUG1(DBG_IB, dip, "ib_get_intr_target: ino %x\n", ino); + + imregp = ib_intr_map_reg_addr(ib_p, ino); + imregval = *imregp; + + *cpu_id_p = ib_map_reg_get_cpu(imregval); + + DEBUG1(DBG_IB, dip, "ib_get_intr_target: cpu_id %x\n", *cpu_id_p); + + return (DDI_SUCCESS); +} + +/* + * Associate a new CPU with a given ino. + * Operate only on inos which are already mapped to devices. + */ +int +ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id) +{ + dev_info_t *dip = pci_p->pci_dip; + ib_t *ib_p = pci_p->pci_ib_p; + int ret = DDI_SUCCESS; + uint32_t old_cpu_id; + hrtime_t start_time; + uint64_t imregval; + uint64_t new_imregval; + volatile uint64_t *imregp; + volatile uint64_t *idregp; + extern const int _ncpu; + extern cpu_t *cpu[]; + + DEBUG2(DBG_IB, dip, "ib_set_intr_target: ino %x cpu_id %x\n", + ino, cpu_id); + + imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, ino); + idregp = IB_INO_INTR_STATE_REG(ib_p, ino); + + /* Save original mapreg value. */ + imregval = *imregp; + DEBUG1(DBG_IB, dip, "ib_set_intr_target: orig mapreg value: 0x%llx\n", + imregval); + + /* Operate only on inos which are already enabled. */ + if (!(imregval & COMMON_INTR_MAP_REG_VALID)) + return (DDI_FAILURE); + + /* Is this request a noop? */ + if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == cpu_id) + return (DDI_SUCCESS); + + /* Clear the interrupt valid/enable bit for particular ino. */ + DEBUG0(DBG_IB, dip, "Clearing intr_enabled...\n"); + *imregp = imregval & ~COMMON_INTR_MAP_REG_VALID; + + /* Wait until there are no more pending interrupts. */ + start_time = gethrtime(); + + DEBUG0(DBG_IB, dip, "About to check for pending interrupts...\n"); + + while (IB_INO_INTR_PENDING(idregp, ino)) { + DEBUG0(DBG_IB, dip, "Waiting for pending ints to clear\n"); + if ((gethrtime() - start_time) < pci_intrpend_timeout) { + continue; + } else { /* Timed out waiting. */ + DEBUG0(DBG_IB, dip, "Timed out waiting \n"); + return (DDI_EPENDING); + } + } + + new_imregval = *imregp; + + DEBUG1(DBG_IB, dip, + "after disabling intr, mapreg value: 0x%llx\n", new_imregval); + + /* + * Get lock, validate cpu and write new mapreg value. + */ + mutex_enter(&cpu_lock); + if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) { + /* Prepare new mapreg value with intr enabled and new cpu_id. */ + new_imregval &= + COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO; + new_imregval = ib_get_map_reg(new_imregval, cpu_id); + + DEBUG1(DBG_IB, dip, "Writing new mapreg value:0x%llx\n", + new_imregval); + + *imregp = new_imregval; + + ib_log_new_cpu(ib_p, old_cpu_id, cpu_id, ino); + } else { /* Invalid cpu. Restore original register image. */ + DEBUG0(DBG_IB, dip, + "Invalid cpuid: writing orig mapreg value\n"); + + *imregp = imregval; + ret = DDI_EINVAL; + } + mutex_exit(&cpu_lock); + + return (ret); +} + + +/* * Return the dips or number of dips associated with a given interrupt block. * Size of dips array arg is passed in as dips_ret arg. * Number of dips returned is returned in dips_ret arg.
--- a/usr/src/uts/sun4u/io/pci/pci_intr.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4u/io/pci/pci_intr.c Wed Jul 08 12:59:05 2009 +0800 @@ -588,6 +588,7 @@ *ino_p->ino_map_reg; } ino_done: + hdlp->ih_target = ino_p->ino_cpuid; ih_p->ih_ipil_p = ipil_p; ih_p->ih_ksp = kstat_create("pci_intrs", atomic_inc_32_nv(&pciintr_ks_instance), "config", "interrupts",
--- a/usr/src/uts/sun4u/io/pci/pci_tools.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4u/io/pci/pci_tools.c Wed Jul 08 12:59:05 2009 +0800 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/stat.h> #include <sys/sunddi.h> #include <sys/param.h> @@ -104,7 +102,6 @@ uint64_t paddr, uint64_t *value_p); static int pcitool_phys_poke(pci_t *pci_p, boolean_t type, size_t size, uint64_t paddr, uint64_t value); -static boolean_t pcitool_validate_cpuid(uint32_t cpu_id); static int pcitool_access(pci_t *pci_p, uint64_t phys_addr, uint64_t max_addr, uint64_t *data, uint8_t size, boolean_t write, boolean_t endian, uint32_t *pcitool_status); @@ -251,22 +248,6 @@ } -/* - * Validate the cpu_id passed in. - * A value of B_TRUE will be returned for success. - */ -static boolean_t -pcitool_validate_cpuid(uint32_t cpuid) -{ - extern const int _ncpu; - extern cpu_t *cpu[]; - - ASSERT(mutex_owned(&cpu_lock)); - - return ((cpuid < _ncpu) && (cpu[cpuid] && cpu_is_online(cpu[cpuid]))); -} - - /*ARGSUSED*/ static int pcitool_intr_info(dev_info_t *dip, void *arg, int mode) @@ -280,6 +261,9 @@ return (EFAULT); } + if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI) + return (ENOTSUP); + intr_info.ctlr_version = 0; /* XXX how to get real version? */ intr_info.ctlr_type = PCITOOL_CTLR_TYPE_RISC; intr_info.num_intr = PCI_MAX_INO; @@ -314,6 +298,7 @@ uint64_t imregval; uint32_t ino; uint8_t num_devs_ret; + int cpu_id; int copyout_rval; int rval = SUCCESS; @@ -324,6 +309,13 @@ return (EFAULT); } + if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) { + partial_iget.status = PCITOOL_IO_ERROR; + partial_iget.num_devs_ret = 0; + rval = ENOTSUP; + goto done_get_intr; + } + ino = partial_iget.ino; num_devs_ret = partial_iget.num_devs_ret; @@ -369,7 +361,6 @@ * This bit happens to be the same on Fire and Tomatillo. */ if (imregval & COMMON_INTR_MAP_REG_VALID) { - /* * The following looks up the ib_ino_info and returns * info of devices mapped to this ino. @@ -377,18 +368,23 @@ iget->num_devs = ib_get_ino_devs( ib_p, ino, &iget->num_devs_ret, iget->dev); + if (ib_get_intr_target(pci_p, ino, &cpu_id) != DDI_SUCCESS) { + iget->status = PCITOOL_IO_ERROR; + rval = EIO; + goto done_get_intr; + } + /* * Consider only inos mapped to devices (as opposed to * inos mapped to the bridge itself. */ if (iget->num_devs > 0) { - /* * These 2 items are platform specific, * extracted from the bridge. */ iget->ctlr = 0; - iget->cpu_id = ib_map_reg_get_cpu(imregval); + iget->cpu_id = cpu_id; } } done_get_intr: @@ -417,16 +413,14 @@ { ib_t *ib_p = pci_p->pci_ib_p; int rval = SUCCESS; - + int ret = DDI_SUCCESS; uint8_t zero = 0; pcitool_intr_set_t iset; - uint32_t old_cpu_id; - hrtime_t start_time; + volatile uint64_t *imregp; uint64_t imregval; - uint64_t new_imregval; - volatile uint64_t *imregp; - volatile uint64_t *idregp; + size_t copyinout_size; + int old_cpu_id; bzero(&iset, sizeof (pcitool_intr_set_t)); @@ -452,7 +446,8 @@ goto done_set_intr; } - if (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP) { + if ((iset.flags & PCITOOL_INTR_FLAG_SET_GROUP) || + (iset.flags & PCITOOL_INTR_FLAG_SET_MSI)) { iset.status = PCITOOL_IO_ERROR; rval = ENOTSUP; goto done_set_intr; @@ -467,21 +462,7 @@ } imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, iset.ino); - idregp = IB_INO_INTR_STATE_REG(ib_p, iset.ino); - - DEBUG4(DBG_TOOLS, dip, "set_intr: cpu:%d, ino:0x%x, mapreg @ " - "0x%llx, intr_stat @ 0x%llx\n", - iset.cpu_id, iset.ino, imregp, idregp); - - /* Save original mapreg value. */ imregval = *imregp; - DEBUG1(DBG_TOOLS, dip, "orig mapreg value: 0x%llx\n", imregval); - - /* Is this request a noop? */ - if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == iset.cpu_id) { - iset.status = PCITOOL_SUCCESS; - goto done_set_intr; - } /* Operate only on inos which are already enabled. */ if (!(imregval & COMMON_INTR_MAP_REG_VALID)) { @@ -490,66 +471,32 @@ goto done_set_intr; } - /* Clear the interrupt valid/enable bit for particular ino. */ - DEBUG0(DBG_TOOLS, dip, "Clearing intr_enabled...\n"); - *imregp = imregval & ~COMMON_INTR_MAP_REG_VALID; - - /* Wait until there are no more pending interrupts. */ - start_time = gethrtime(); - - DEBUG0(DBG_TOOLS, dip, "About to check for pending interrupts...\n"); + if (ib_get_intr_target(pci_p, iset.ino, &old_cpu_id) != DDI_SUCCESS) { + iset.status = PCITOOL_INVALID_INO; + rval = EINVAL; + goto done_set_intr; + } - while (IB_INO_INTR_PENDING(idregp, iset.ino)) { - - DEBUG0(DBG_TOOLS, dip, "Waiting for pending ints to clear\n"); - if ((gethrtime() - start_time) < pci_intrpend_timeout) - continue; - - else { /* Timed out waiting. */ - iset.status = PCITOOL_PENDING_INTRTIMEOUT; - rval = ETIME; - goto done_set_intr; - } + if ((ret = ib_set_intr_target(pci_p, iset.ino, + iset.cpu_id)) == DDI_SUCCESS) { + iset.cpu_id = old_cpu_id; + iset.status = PCITOOL_SUCCESS; + goto done_set_intr; } - new_imregval = *imregp; - - DEBUG1(DBG_TOOLS, dip, - "after disabling intr, mapreg value: 0x%llx\n", new_imregval); - - /* - * Get lock, validate cpu and write new mapreg value. - * Return original cpu value to caller via iset.cpu_id. - */ - mutex_enter(&cpu_lock); - if (pcitool_validate_cpuid(iset.cpu_id)) { - - /* Prepare new mapreg value with intr enabled and new cpu_id. */ - new_imregval &= - COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO; - new_imregval = ib_get_map_reg(new_imregval, iset.cpu_id); - - DEBUG1(DBG_TOOLS, dip, "Writing new mapreg value:0x%llx\n", - new_imregval); - - *imregp = new_imregval; - - ib_log_new_cpu(ib_p, old_cpu_id, iset.cpu_id, iset.ino); - - mutex_exit(&cpu_lock); - - iset.cpu_id = old_cpu_id; - iset.status = PCITOOL_SUCCESS; - - } else { /* Invalid cpu. Restore original register image. */ - - DEBUG0(DBG_TOOLS, dip, - "Invalid cpuid: writing orig mapreg value\n"); - - *imregp = imregval; - mutex_exit(&cpu_lock); + switch (ret) { + case DDI_EPENDING: + iset.status = PCITOOL_PENDING_INTRTIMEOUT; + rval = ETIME; + break; + case DDI_EINVAL: iset.status = PCITOOL_INVALID_CPUID; rval = EINVAL; + break; + default: + iset.status = PCITOOL_INVALID_INO; + rval = EINVAL; + break; } done_set_intr: iset.drvr_version = PCITOOL_VERSION;
--- a/usr/src/uts/sun4u/sys/pci/pci_ib.h Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4u/sys/pci/pci_ib.h Wed Jul 08 12:59:05 2009 +0800 @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_PCI_IB_H #define _SYS_PCI_IB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -218,8 +216,6 @@ uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2), caddr_t int_handler_arg1, caddr_t int_handler_arg2); extern void ib_free_ih(ih_t *ih_p); -extern int ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip, - ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state); extern void ib_ino_map_reg_share(ib_t *ib_p, ib_ino_t ino, ib_ino_info_t *ino_p); extern int ib_ino_map_reg_unshare(ib_t *ib_p, ib_ino_t ino, @@ -231,6 +227,10 @@ volatile uint64_t *imr_p); extern void ib_intr_dist_all(void *arg, int32_t max_weight, int32_t weight); extern void ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id); +extern int ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip, + ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state); +extern int ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p); +extern int ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id); extern uint8_t ib_get_ino_devs(ib_t *ib_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs); extern void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
--- a/usr/src/uts/sun4v/io/px/px_lib4v.c Tue Jul 07 11:23:28 2009 -0700 +++ b/usr/src/uts/sun4v/io/px/px_lib4v.c Wed Jul 08 12:59:05 2009 +0800 @@ -268,7 +268,7 @@ return (DDI_FAILURE); } - DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", cpuid); + DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", *cpuid); return (DDI_SUCCESS); }