Mercurial > illumos > illumos-gate
changeset 11600:651a9a4f7b5f
6875273 Intel IOMMU needs a rewrite
6855502 iommu: Toshiba Portege R600 fails to suspend with VT enabled starting with daily.0624
6874904 Lenovo X301 - Messages spews up some stuff now. Suspend/Resume no longer work.
6885148 Huge network performance drop with multiple NICs on x86 platforms with IOMMU
6890819 slow reboot got much slower in snv_118 on my tecra M10
6808450 Fast Reboot does not work on Virgo blade
6877258 Virgo will kernel panic with VT-d enabled under heavy network traffic
6910946 Westmere Class System panics on snv_129-: Freeing a free IOMMU page: paddr=0x8379c000 under I/O load
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/rootnex/intel_iommu.c Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,883 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ -#include <sys/mdb_modapi.h> -#include <sys/list.h> -#include <sys/note.h> -#include <sys/dditypes.h> -#include <sys/ddi_impldefs.h> -#include <sys/intel_iommu.h> -#include <sys/iommulib.h> -#include <stddef.h> - -/* - * Does Intel IOMMU works on this system? - */ -static boolean_t iommu_support = B_FALSE; - -static void -iomuvtop_help(void) -{ - mdb_printf("print physical mapping of IO virtual address\n\n" - "Usage:\n\n" - " address::iomuvtop <iova>\n\n" - "Where, \"address\" is the address of the devinfo node, " - "while \"iova\" is the DMA virtual address.\n"); -} - -static boolean_t -iommu_supported(void) -{ - if (iommu_support == B_FALSE) - mdb_printf("No Intel IOMMU active on this system\n"); - return (iommu_support); -} - -/* - * print_device_scope_cb() - * call back for print_device_scope() - */ -static int -print_device_scope_cb(uintptr_t addr, pci_dev_scope_t *devs, void *cbdata) -{ - _NOTE(ARGUNUSED(addr)) - - mdb_printf((char *)cbdata); - mdb_printf("BDF[%x:%x:%x],type[%x]\n", - devs->pds_bus, - devs->pds_dev, - devs->pds_func, - devs->pds_type); - - return (WALK_NEXT); -} - -/* - * print_device_scope() - * a common function to print device scope of a drhd or rmrr - */ -static void -print_device_scope(const char *pre, uintptr_t addr) -{ - mdb_pwalk("list", - (mdb_walk_cb_t)print_device_scope_cb, (void *)pre, addr); -} - -/* - * parse_hw_capa() - * parse_hw_excapa() - * - * Given the capability and extension capability register contents, - * parse and print supported features in <output> - * - * Please refer to chapter 10.4.2/3 in "Intel virutalization technology - * for direct IO specification" for register details - */ -static void -parse_hw_capa(uint64_t capa) -{ - char string[128]; - size_t len; - - strcpy(string, " Hardware Capability:\t\t"); - if (IOMMU_CAP_GET_DRD(capa)) - strcat(string, "DRD "); - if (IOMMU_CAP_GET_DWD(capa)) - strcat(string, "DWD "); - if (IOMMU_CAP_GET_PSI(capa)) - strcat(string, "PSI "); - if (IOMMU_CAP_GET_ISOCH(capa)) - strcat(string, "ISOCH "); - if (IOMMU_CAP_GET_ZLR(capa)) - strcat(string, "ZLR "); - if (IOMMU_CAP_GET_CM(capa)) - strcat(string, "CM "); - if (IOMMU_CAP_GET_PHMR(capa)) - strcat(string, "PHMR "); - if (IOMMU_CAP_GET_PLMR(capa)) - strcat(string, "PLMR "); - if (IOMMU_CAP_GET_RWBF(capa)) - strcat(string, "RWBF "); - if (IOMMU_CAP_GET_AFL(capa)) - strcat(string, "AFL "); - - len = strlen(string); - if ((len > 1) && - (string[len - 1] == ' ')) - string[len - 1] = 0; - - strcat(string, "\n"); - mdb_printf(string); -} - -static void -parse_hw_excapa(uint64_t excapa) -{ - char string[128]; - size_t len; - - strcpy(string, " Hardware Ex-Capability:\t"); - if (IOMMU_ECAP_GET_SC(excapa)) - strcat(string, "SC "); - if (IOMMU_ECAP_GET_PT(excapa)) - strcat(string, "PT "); - if (IOMMU_ECAP_GET_CH(excapa)) - strcat(string, "CH "); - if (IOMMU_ECAP_GET_EIM(excapa)) - strcat(string, "EIM "); - if (IOMMU_ECAP_GET_IR(excapa)) - strcat(string, "IR "); - if (IOMMU_ECAP_GET_DI(excapa)) - strcat(string, "DI "); - if (IOMMU_ECAP_GET_QI(excapa)) - strcat(string, "QI "); - if (IOMMU_ECAP_GET_C(excapa)) - strcat(string, "C "); - - len = strlen(string); - if ((len > 1) && - (string[len - 1] == ' ')) - string[len - 1] = 0; - - strcat(string, "\n"); - mdb_printf(string); -} - -typedef enum { - ERROR_SCOPE, - INCLUDE_ALL_SCOPE, - DEV_SCOPE -} iomu_scope_t; - -/* - * print_iommu_state() - * Given an iommu_state structure, parse and print iommu information - * - * Returns: - * INCLUDE_ALL_SCOPE if include all is set - * DEV_SCOPE if not set - * ERROR_SCOPE on error. - */ -static iomu_scope_t -print_iommu_state(intel_iommu_state_t *iommu, drhd_info_t *drhd) -{ - if ((iommu == NULL) || (drhd == NULL)) { - mdb_warn("Internal error - NULL iommu state pointer passed\n"); - return (ERROR_SCOPE); - } - - mdb_printf("Intel DMA remapping unit\n"); - mdb_printf(" IOMMU Status:\t\t\t%s\n", - (iommu->iu_enabled & DMAR_ENABLE) ? "Enabled" : "Disabled"); - mdb_printf(" Queued Invalid:\t\t%s\n", - (iommu->iu_enabled & QINV_ENABLE) ? "Enabled" : "Disabled"); - mdb_printf(" Interrupt remapping:\t\t%s\n", - (iommu->iu_enabled & INTRR_ENABLE) ? "Enabled" : "Disabled"); - mdb_printf(" Register Physical Address:\t%p\n", - (uintptr_t)drhd->di_reg_base); - mdb_printf(" Register Virtual Address:\t%p\n", - (uintptr_t)iommu->iu_reg_address); - parse_hw_capa(iommu->iu_capability); - parse_hw_excapa(iommu->iu_excapability); - mdb_printf(" Root Entry Table:\t\t%p\n", - (uintptr_t)iommu->iu_root_entry_paddr); - mdb_printf(" Guest Address Width:\t\t%d\n", iommu->iu_gaw); - mdb_printf(" Adjust Guest Address Width:\t%d\n", iommu->iu_agaw); - mdb_printf(" Page Table Level:\t\t%d\n", iommu->iu_level); - mdb_printf(" Max Domain Supported:\t\t%d\n", iommu->iu_max_domain); - mdb_printf(" System Coherence:\t\t%s\n", - iommu->iu_coherency ? "Yes" : "No"); - mdb_printf(" Include All unit:\t\t%s\n", - drhd->di_include_all ? "Yes" : "No"); - mdb_printf(" Devinfo Node:\t\t\t%p\n", - (intptr_t)drhd->di_dip); - - if (iommu->iu_enabled & QINV_ENABLE) { - struct inv_queue_state qi_state; - if (iommu->iu_inv_queue && - mdb_vread(&qi_state, sizeof (qi_state), - (intptr_t)iommu->iu_inv_queue) == sizeof (qi_state)) { - mdb_printf(" Qinv Table:\t\t\tpaddr:%p, " - "vaddr:%p, size:%x\n", - (uintptr_t)qi_state.iq_table.paddr, - (uintptr_t)qi_state.iq_table.vaddr, - qi_state.iq_table.size); - mdb_printf(" Sync Table:\t\t\tpaddr:%p, " - "vaddr:%p, size:%x\n", - (uintptr_t)qi_state.iq_sync.paddr, - (uintptr_t)qi_state.iq_sync.vaddr, - qi_state.iq_sync.size); - } else { - mdb_warn("failed to read iommu invalidation " - "queue state at %p\n", - (uintptr_t)iommu->iu_inv_queue); - return (ERROR_SCOPE); - } - } - - return (drhd->di_include_all ? INCLUDE_ALL_SCOPE : DEV_SCOPE); -} - -/* - * dcmd: iomuprt - */ -static int -iomuprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(argv)) - intel_iommu_state_t iommu; - drhd_info_t drhd; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((argc != 0) || !(flags & DCMD_ADDRSPEC)) - return (DCMD_USAGE); - - if (!DCMD_HDRSPEC(flags)) - mdb_printf("\n"); - - if ((mdb_vread(&iommu, sizeof (iommu), addr) == sizeof (iommu)) && - (iommu.iu_drhd != NULL) && - (mdb_vread(&drhd, sizeof (drhd), - (intptr_t)iommu.iu_drhd) == sizeof (drhd))) { - switch (print_iommu_state(&iommu, &drhd)) { - case DEV_SCOPE: - /* - * Use actual address of list_t in kernel for walker - */ - print_device_scope(" Device Scope:\t\t\t", - (uintptr_t)((char *)iommu.iu_drhd + - offsetof(drhd_info_t, di_dev_list))); - break; - case ERROR_SCOPE: - return (DCMD_ERR); - default: - break; - } - } else { - mdb_warn("failed to read iommu state at %p\n", addr); - return (DCMD_ERR); - } - - return (DCMD_OK); -} - -/* - * print_iommu_addr() - * callback to print addresses of IOMMU unit software structures - */ -static int -print_iommu_addr(uintptr_t addr, intel_iommu_state_t *ip, void *cbdata) -{ - _NOTE(ARGUNUSED(cbdata)) - _NOTE(ARGUNUSED(ip)) - intel_iommu_state_t iommu; - - if (mdb_vread(&iommu, sizeof (iommu), addr) != sizeof (iommu)) { - mdb_warn("failed to read IOMMU structure at %p\n", addr); - return (WALK_ERR); - } - - mdb_printf("%p\n", addr); - - return (WALK_NEXT); -} - -/* - * dcmd: iomunits - */ -static int -iomunits(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(addr)) - _NOTE(ARGUNUSED(argv)) - GElf_Sym sym; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((flags & DCMD_ADDRSPEC) || (argc != 0)) { - return (DCMD_USAGE); - } - - if (mdb_lookup_by_name("iommu_states", &sym) == -1) { - mdb_warn("failed to find symbol iommu_states\n"); - return (DCMD_ERR); - } - - addr = (uintptr_t)sym.st_value; - if (mdb_pwalk("list", (mdb_walk_cb_t)print_iommu_addr, NULL, addr)) { - mdb_warn("couldn't walk IOMMU state structures\n"); - return (DCMD_ERR); - } - return (DCMD_OK); -} - - - -/* - * print_domain_state() - * Given an device domain structure, parse and print information - */ -static void -print_domain_state(dmar_domain_state_t *domain) -{ - if (domain == NULL) { - mdb_warn("Internal error: NULL domain pointer passed\n"); - return; - } - - mdb_printf("IOMMU device domain:\n"); - mdb_printf("Domain ID:\t\t%d\n", domain->dm_domain_id); - mdb_printf("Bind IOMMU:\t\t%p\n", (uintptr_t)domain->dm_iommu); - mdb_printf("DVMA vmem:\t\t%p\n", - (uintptr_t)domain->dm_dvma_map); - mdb_printf("Top Level Page Table:\t%p\n", - (uintptr_t)domain->dm_page_table_paddr); - mdb_printf("Identity Mapping:\t\t%s\n", - domain->dm_identity ? "YES" : "NO"); -} - -/* - * dcmd: iomudomprt - */ -static int -iomudomprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(argv)) - dmar_domain_state_t domain; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((argc != 0) || !(flags & DCMD_ADDRSPEC)) - return (DCMD_USAGE); - - if (!DCMD_HDRSPEC(flags)) - mdb_printf("\n"); - - if (mdb_vread(&domain, sizeof (domain), addr) == sizeof (domain)) { - print_domain_state(&domain); - } else { - mdb_warn("failed to read domain at %p\n", addr); - return (DCMD_ERR); - } - - return (DCMD_OK); -} - -/* - * print_domain_addr() - */ -static int -print_domain_addr(uintptr_t addr, dmar_domain_state_t *domp, void *cbdata) -{ - _NOTE(ARGUNUSED(domp)) - _NOTE(ARGUNUSED(cbdata)) - dmar_domain_state_t domain; - - if (iommu_supported() == B_FALSE) - return (WALK_NEXT); - - if (mdb_vread(&domain, sizeof (domain), addr) != sizeof (domain)) { - mdb_warn("failed to read domain at %p\n", addr); - return (WALK_ERR); - } - - mdb_printf("%p\n", addr); - - return (WALK_NEXT); -} - -/* - * dcmd: iomudoms - */ -static int -iomudoms(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(addr)) - _NOTE(ARGUNUSED(argv)) - GElf_Sym sym; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((flags & DCMD_ADDRSPEC) || (argc != 0)) { - return (DCMD_USAGE); - } - - if (mdb_lookup_by_name("domain_states", &sym) == -1) { - mdb_warn("failed to find symbol domain_states\n"); - return (DCMD_ERR); - } - - addr = (uintptr_t)sym.st_value; - if (mdb_pwalk("list", (mdb_walk_cb_t)print_domain_addr, NULL, addr)) - return (DCMD_ERR); - return (DCMD_OK); -} - -/* - * print_rmrr_info() - */ -static void -print_rmrr_info(rmrr_info_t *rmrr) -{ - mdb_printf("Reserved Memory Region Reporting:\n"); - mdb_printf(" Segment:\t%d\n", rmrr->ri_segment); - mdb_printf(" BaseAddr:\t%p\n", (uintptr_t)rmrr->ri_baseaddr); - mdb_printf(" LimiAddr:\t%p\n", (uintptr_t)rmrr->ri_limiaddr); -} - -/* - * print_rmrr_addr() - * list walk callback for list_rmrr - */ -static int -print_rmrr_addr(uintptr_t addr, rmrr_info_t *rp, void *cbdata) -{ - _NOTE(ARGUNUSED(rp)) - _NOTE(ARGUNUSED(cbdata)) - rmrr_info_t rmrr; - - if (mdb_vread(&rmrr, sizeof (rmrr), addr) != sizeof (rmrr)) { - mdb_warn("failed to read RMRR structure at %p\n", addr); - return (WALK_ERR); - } - - mdb_printf("%p\n", addr); - - return (WALK_NEXT); -} - -/* - * dcmd: iomurmrrs - */ -static int -iomurmrrs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(addr)) - _NOTE(ARGUNUSED(argv)) - GElf_Sym sym; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((flags & DCMD_ADDRSPEC) || (argc != 0)) { - return (DCMD_USAGE); - } - - if (mdb_lookup_by_name("rmrr_states", &sym) == -1) { - mdb_warn("failed to find symbol rmrr_states\n"); - return (DCMD_ERR); - } - - addr = (uintptr_t)sym.st_value; - if (mdb_pwalk("list", (mdb_walk_cb_t)print_rmrr_addr, NULL, addr)) - return (DCMD_ERR); - return (DCMD_OK); -} - -/* - * dcmd: iomurmrrprt: Given an RMRR address print the RMRR. - */ -static int -iomurmrrprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(argv)) - uintptr_t dev_list_addr; - rmrr_info_t rmrr; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if (!(flags & DCMD_ADDRSPEC) || (argc != 0)) { - return (DCMD_USAGE); - } - - if (mdb_vread(&rmrr, sizeof (rmrr), addr) != sizeof (rmrr)) { - mdb_warn("failed to read RMRR structure at %p\n", addr); - return (DCMD_ERR); - } - - dev_list_addr = addr + offsetof(rmrr_info_t, ri_dev_list); - print_rmrr_info(&rmrr); - print_device_scope(" DevScope:\t", dev_list_addr); - - return (DCMD_OK); -} - -/* - * iova_level_to_offset() - * Given an iova and page table level, return the corresponding offset - */ -static int -iova_level_to_offset(uintptr_t iova, int level) -{ - int start, offset; - - start = (level - 1) * IOMMU_LEVEL_STRIDE + IOMMU_PAGE_SHIFT; - offset = (iova >> start) & IOMMU_LEVEL_OFFSET; - - return (offset); -} - -/* - * iovtp_read_table_entry() - */ -static int -iovtp_read_table_entry(uint64_t ptaddr, size_t offset, - void *ent_buf, size_t ent_size) -{ - if (mdb_pread(ent_buf, ent_size, ptaddr + offset * ent_size) - != ent_size) { - return (B_FALSE); - } else { - return (B_TRUE); - } -} - -/* - * dcmd: iomuvtop - */ -static int -iomuvtop(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - iommu_private_t private; - dmar_domain_state_t domain; - struct dev_info dinfo; - intel_iommu_state_t iommu; - int i, level, offset; - uintptr_t iova; - uint64_t ptaddr, ptentr; - int bus, devfn; - - struct root_context_entry { - uint64_t asr; - uint64_t pro; - } rc_entry; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if (!(flags & DCMD_ADDRSPEC) || (argc != 1)) { - return (DCMD_USAGE); - } - - iova = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ? - (uintptr_t)argv[0].a_un.a_val : - (uintptr_t)mdb_strtoull(argv->a_un.a_str); - - /* read iommu private */ - if ((mdb_vread(&dinfo, sizeof (dinfo), addr) != sizeof (dinfo)) || - (dinfo.devi_iommu_private == NULL) || - (mdb_vread(&private, sizeof (private), - (uintptr_t)dinfo.devi_iommu_private) != sizeof (private))) { - mdb_warn("failed to read iommu private structure for " - "devinfo node at address %p\n", addr); - return (DCMD_ERR); - } - - bus = private.idp_bus; - devfn = private.idp_devfn; - - /* read domain */ - if (private.idp_intel_domain == NULL) { - mdb_printf("IOMMU domain for this device has not yet been " - "allocated.\nNo mapped physical address for this vaddr\n"); - return (DCMD_OK); - } - - if (mdb_vread(&domain, sizeof (domain), - (uintptr_t)private.idp_intel_domain) - != sizeof (domain)) { - mdb_warn("failed to read domain structure at %p\n", - (uintptr_t)private.idp_intel_domain); - return (DCMD_ERR); - } - - /* read iommu */ - if (mdb_vread(&iommu, sizeof (iommu), (uintptr_t)domain.dm_iommu) - != sizeof (iommu)) { - mdb_warn("failed to read iommu structure at %p\n", - (uintptr_t)domain.dm_iommu); - return (DCMD_ERR); - } - - mdb_printf("Level\tPageTableAddress\tOffset\tPageTableEntry\n"); - - /* walk and print root context tabls */ - ptaddr = iommu.iu_root_entry_paddr; - if (iovtp_read_table_entry(ptaddr, bus, &rc_entry, sizeof (rc_entry)) - == B_FALSE) { - mdb_warn("failed to read root table entry for bus %x " - "at %p\n", bus, (uintptr_t)ptaddr); - return (DCMD_ERR); - } - mdb_printf("Root\t%p\t\t%x\tlow :%p\n", (uintptr_t)ptaddr, - bus, (uintptr_t)rc_entry.asr); - mdb_printf("Root\t%p\t\t%x\thigh:%p\n", (uintptr_t)ptaddr, - bus, (uintptr_t)rc_entry.pro); - - ptaddr = rc_entry.asr & IOMMU_PAGE_MASK; - if (iovtp_read_table_entry(ptaddr, devfn, &rc_entry, sizeof (rc_entry)) - == B_FALSE) { - mdb_warn("failed to read context table entry for " - "device-function %x at %p\n", devfn, (uintptr_t)ptaddr); - return (DCMD_ERR); - } - mdb_printf("Context\t%p\t\t%x\tlow :%p\n", (uintptr_t)ptaddr, - devfn, (uintptr_t)rc_entry.asr); - mdb_printf("Context\t%p\t\t%x\thigh:%p\n", (uintptr_t)ptaddr, - devfn, (uintptr_t)rc_entry.pro); - - /* walk and print page tables */ - ptaddr = rc_entry.asr & IOMMU_PAGE_MASK; - - /* - * Toppest level page table address should be the same - * as that stored in domain structure - */ - if (ptaddr != domain.dm_page_table_paddr) { - mdb_warn("The top level page table retrieved from context" - " table doesn't match that from the domain structure." - " Aborting PA lookup.\n"); - return (DCMD_ERR); - } - - level = iommu.iu_level; - for (i = level; i > 0; i--) { - if (!ptaddr) { - mdb_printf("\nNULL page table entry encountered at " - " page table level %d. Aborting PA lookup.\n", i); - return (DCMD_OK); - } - offset = iova_level_to_offset(iova, i); - if (iovtp_read_table_entry(ptaddr, offset, &ptentr, - sizeof (ptentr)) == B_FALSE) { - mdb_warn("failed to read page table entry " - "(level %d) at %p\n", i, (uintptr_t)ptaddr); - return (DCMD_ERR); - } - mdb_printf("%x\t%p\t\t%x\t%p\n", i, (uintptr_t)ptaddr, - offset, (uintptr_t)ptentr); - ptaddr = ptentr & IOMMU_PAGE_MASK; - } - - return (DCMD_OK); -} - -typedef struct bdf_cb_data { - int dc_seg; - int dc_bus; - int dc_devfunc; - int dc_match; -} bdf_cb_data_t; - -/* - * match_bdf() - * call back function that matches BDF - */ -static int -match_bdf(uintptr_t addr, struct dev_info *dev, bdf_cb_data_t *cbdata) -{ - _NOTE(ARGUNUSED(addr)) - /* if there is iommu private, get it */ - if (dev->devi_iommu_private != NULL) { - iommu_private_t private; - if (mdb_vread((void*)&private, sizeof (private), - (uintptr_t)dev->devi_iommu_private) != sizeof (private)) { - mdb_warn("failed to read iommu private at %p\n", - (uintptr_t)dev->devi_iommu_private); - return (WALK_ERR); - } - - if (private.idp_seg == cbdata->dc_seg && - private.idp_bus == cbdata->dc_bus && - private.idp_devfn == cbdata->dc_devfunc) { - if (cbdata->dc_match == 0) { - mdb_printf("%p\n", addr); - cbdata->dc_match = 1; - } else { - mdb_warn("More than one devinfo node matches " - "a single pci device. Aborting devinfo " - "lookup\n"); - return (WALK_ERR); - } - } - } - - return (WALK_NEXT); -} - -/* - * dcmd: bdf2devinfo - */ -static int -bdf2devinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(addr)) - bdf_cb_data_t cbdata; - uint_t i, bdf[4]; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if ((flags & DCMD_ADDRSPEC) || (argc != 4)) { - return (DCMD_USAGE); - } - - for (i = 0; i < 4; i++) { - bdf[i] = (argv[i].a_type == MDB_TYPE_IMMEDIATE) ? - (int)argv[i].a_un.a_val : - (int)mdb_strtoull(argv[i].a_un.a_str); - } - - if ((bdf[0] != 0) || (bdf[1] > 255) || (bdf[2] > 31) || (bdf[3] > 7)) { - mdb_warn("invalid pci segment, bus, device, function" - "tuple (%x, %x, %x, %x)\n", bdf[0], bdf[1], bdf[2], bdf[3]); - return (DCMD_USAGE); - } - - - cbdata.dc_seg = bdf[0]; - cbdata.dc_bus = bdf[1]; - cbdata.dc_devfunc = bdf[2] << 3 | bdf[3]; - cbdata.dc_match = 0; - - if (mdb_readvar(&addr, "top_devinfo") == -1) { - mdb_warn("failed to read 'top_devinfo'\n"); - return (DCMD_ERR); - } - - if (mdb_pwalk("devinfo", - (mdb_walk_cb_t)match_bdf, &cbdata, addr)) { - mdb_warn("couldn't walk devinfo tree\n"); - return (DCMD_ERR); - } - - if (cbdata.dc_match == 0) - mdb_printf("No devinfo node found for %x:%x:%x:%x\n", - bdf[0], bdf[1], bdf[2], bdf[3]); - - return (DCMD_OK); -} - -/* - * dcmd: iomudip2dom - */ -static int -iomudip2dom(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) -{ - _NOTE(ARGUNUSED(argv)) - struct dev_info dinfo; - iommu_private_t private; - - if (iommu_supported() == B_FALSE) - return (DCMD_OK); - - if (!(flags & DCMD_ADDRSPEC) || (argc != 0)) { - return (DCMD_USAGE); - } - - /* read iommu private */ - if ((mdb_vread(&dinfo, sizeof (dinfo), addr) != sizeof (dinfo)) || - (dinfo.devi_iommu_private == NULL) || - (mdb_vread(&private, sizeof (private), - (uintptr_t)dinfo.devi_iommu_private) != sizeof (private))) { - mdb_warn("failed to read iommu private structure for " - "devinfo node at %p\n", addr); - return (DCMD_ERR); - } - - /* read domain */ - if (private.idp_intel_domain != NULL) { - mdb_printf("%p\n", (uintptr_t)private.idp_intel_domain); - } else { - mdb_printf("No domain dedicated for this device\n"); - } - - return (DCMD_OK); -} - -static const mdb_dcmd_t dcmds[] = { - { "iomunits", NULL, - "list addresses of software state structure for all IOMMUs", - iomunits }, - { "iomuprt", "?", - "given an IOMMU's state structure address, print its contents", - iomuprt}, - { "iomudoms", NULL, - "list addresses of all IOMMU domain software structures", - iomudoms }, - { "iomudomprt", "?", - "given an IOMMU's domain struct address, print its contents", - iomudomprt }, - { "iomurmrrs", NULL, - "list addresses of all Intel IOMMU RMRR software structures", - iomurmrrs }, - { "iomurmrrprt", NULL, - "given an IOMMU RMRR structure address, print its contents", - iomurmrrprt }, - { "iomuvtop", "?<iova>", - "print physical address of an IO virtual address", - iomuvtop, iomuvtop_help }, - { "bdf2devinfo", "[segment] [bus] [dev] [func]", - "given its pci segment/bus/dev/func, print the devinfo node", - bdf2devinfo }, - { "iomudip2dom", "?", - "given a devinfo node, print the address of its IOMMU domain", - iomudip2dom }, - { NULL } -}; - -static const mdb_walker_t walkers[] = { - { NULL } -}; - -static const mdb_modinfo_t modinfo = { - MDB_API_VERSION, dcmds, walkers -}; - -const mdb_modinfo_t * -_mdb_init(void) -{ - GElf_Sym sym; - - /* check to see if kernel supports iommu */ - if (mdb_lookup_by_name("intel_iommu_support", &sym) != -1) { - if (mdb_vread(&iommu_support, sizeof (boolean_t), - (uintptr_t)sym.st_value) != sizeof (boolean_t)) { - iommu_support = B_FALSE; - } - } - - return (&modinfo); -}
--- a/usr/src/cmd/mdb/intel/amd64/Makefile Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/cmd/mdb/intel/amd64/Makefile Sat Jan 30 18:23:16 2010 -0800 @@ -19,13 +19,13 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # include ../../Makefile.common -MODULES = $(COMMON_MODULES_PROC) $(COMMON_MODULES_KVM) uhci rootnex +MODULES = $(COMMON_MODULES_PROC) $(COMMON_MODULES_KVM) uhci $(CLOSED_BUILD)MODULES += \ $(CLOSED_COMMON_MODULES_KVM:%=$(CLOSED)/cmd/mdb/intel/amd64/%)
--- a/usr/src/cmd/mdb/intel/ia32/Makefile Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/cmd/mdb/intel/ia32/Makefile Sat Jan 30 18:23:16 2010 -0800 @@ -19,14 +19,14 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # include ../../Makefile.common MODULES = $(COMMON_MODULES_PROC) $(COMMON_MODULES_PROC_32BIT) \ - $(COMMON_MODULES_KVM) uhci rootnex + $(COMMON_MODULES_KVM) uhci $(CLOSED_BUILD)MODULES += \ $(CLOSED_COMMON_MODULES_KVM:%=$(CLOSED)/cmd/mdb/intel/ia32/%)
--- a/usr/src/cmd/mdb/intel/ia32/rootnex/Makefile Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. - -# Copyright (c) 2009, Intel Corporation. -# All rights reserved. - -MODULE = rootnex.so -MDBTGT = kvm - -MODSRCS = intel_iommu.c - -include ../../../../Makefile.cmd -include ../../Makefile.ia32 -include ../../../Makefile.module - -CPPFLAGS += -I$(SRC)/uts/i86pc
--- a/usr/src/pkgdefs/SUNWmdb/prototype_i386 Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/pkgdefs/SUNWmdb/prototype_i386 Sat Jan 30 18:23:16 2010 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -81,7 +81,6 @@ f none usr/lib/mdb/kvm/amd64/nfs.so 555 root sys f none usr/lib/mdb/kvm/amd64/ptm.so 555 root sys f none usr/lib/mdb/kvm/amd64/random.so 555 root sys -f none usr/lib/mdb/kvm/amd64/rootnex.so 555 root sys f none usr/lib/mdb/kvm/amd64/s1394.so 555 root sys f none usr/lib/mdb/kvm/amd64/sata.so 555 root sys f none usr/lib/mdb/kvm/amd64/scsi_vhci.so 555 root sys @@ -118,7 +117,6 @@ f none usr/lib/mdb/kvm/nfs.so 555 root sys f none usr/lib/mdb/kvm/ptm.so 555 root sys f none usr/lib/mdb/kvm/random.so 555 root sys -f none usr/lib/mdb/kvm/rootnex.so 555 root sys f none usr/lib/mdb/kvm/s1394.so 555 root sys f none usr/lib/mdb/kvm/sata.so 555 root sys f none usr/lib/mdb/kvm/scsi_vhci.so 555 root sys
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_i386 Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/pkgdefs/SUNWmdbr/prototype_i386 Sat Jan 30 18:23:16 2010 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -50,7 +50,6 @@ f none kernel/kmdb/amd64/nfs 555 root sys f none kernel/kmdb/amd64/ptm 555 root sys f none kernel/kmdb/amd64/random 555 root sys -f none kernel/kmdb/amd64/rootnex 555 root sys f none kernel/kmdb/amd64/s1394 555 root sys f none kernel/kmdb/amd64/sata 555 root sys f none kernel/kmdb/amd64/scsi_vhci 555 root sys @@ -86,7 +85,6 @@ f none kernel/kmdb/nfs 555 root sys f none kernel/kmdb/ptm 555 root sys f none kernel/kmdb/random 555 root sys -f none kernel/kmdb/rootnex 555 root sys f none kernel/kmdb/s1394 555 root sys f none kernel/kmdb/sata 555 root sys f none kernel/kmdb/scsi_vhci 555 root sys
--- a/usr/src/uts/common/os/devcfg.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/common/os/devcfg.c Sat Jan 30 18:23:16 2010 -0800 @@ -57,12 +57,12 @@ #include <sys/sunldi_impl.h> #include <sys/bootprops.h> - -#if defined(__i386) || defined(__amd64) -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) #include <sys/iommulib.h> #endif -#endif + +/* XXX remove before putback */ +boolean_t ddi_err_panic = B_TRUE; #ifdef DEBUG int ddidebug = DDI_AUDIT; @@ -399,10 +399,6 @@ { struct dev_info *devi = DEVI(dip); struct devi_nodeid *elem; -#if defined(__x86) && !defined(__xpv) - gfx_entry_t *gfxp; - extern void *gfx_devinfo_list; -#endif ASSERT(devi->devi_ref == 0); ASSERT(devi->devi_addr == NULL); @@ -410,16 +406,6 @@ ASSERT(devi->devi_child == NULL); ASSERT(devi->devi_hp_hdlp == NULL); -#if defined(__x86) && !defined(__xpv) - for (gfxp = gfx_devinfo_list; gfxp; gfxp = gfxp->g_next) { - if (gfxp->g_dip == dip) { - gfxp->g_dip = NULL; - while (gfxp->g_ref) - ; - } - } - membar_producer(); -#endif /* free devi_addr_buf allocated by ddi_set_name_addr() */ if (devi->devi_addr_buf) kmem_free(devi->devi_addr_buf, 2 * MAXNAMELEN); @@ -1348,14 +1334,12 @@ DEVI_CLR_NEED_RESET(dip); mutex_exit(&(DEVI(dip)->devi_lock)); -#if defined(__i386) || defined(__amd64) -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) /* * Close any iommulib mediated linkage to an IOMMU */ iommulib_nex_close(dip); #endif -#endif /* destroy the taskq */ if (DEVI(dip)->devi_taskq) { @@ -8565,3 +8549,110 @@ if (MDI_PHCI(dip)) mdi_phci_retire_finalize(dip, phci_only); } + +void +ddi_err(ddi_err_t ade, dev_info_t *rdip, const char *fmt, ...) +{ + va_list ap; + char strbuf[256]; + char *buf; + size_t buflen, tlen; + int ce; + int de; + const char *fmtbad = "Invalid arguments to ddi_err()"; + + de = DER_CONT; + strbuf[1] = '\0'; + + switch (ade) { + case DER_CONS: + strbuf[0] = '^'; + break; + case DER_LOG: + strbuf[0] = '!'; + break; + case DER_VERB: + strbuf[0] = '?'; + break; + default: + strbuf[0] = '\0'; + de = ade; + break; + } + + tlen = strlen(strbuf); + buf = strbuf + tlen; + buflen = sizeof (strbuf) - tlen; + + if (rdip && ddi_get_instance(rdip) == -1) { + (void) snprintf(buf, buflen, "%s: ", + ddi_driver_name(rdip)); + } else if (rdip) { + (void) snprintf(buf, buflen, "%s%d: ", + ddi_driver_name(rdip), ddi_get_instance(rdip)); + } + + tlen = strlen(strbuf); + buf = strbuf + tlen; + buflen = sizeof (strbuf) - tlen; + + va_start(ap, fmt); + switch (de) { + case DER_CONT: + (void) vsnprintf(buf, buflen, fmt, ap); + if (ade != DER_CONT) { + (void) strlcat(strbuf, "\n", sizeof (strbuf)); + } + ce = CE_CONT; + break; + case DER_NOTE: + (void) vsnprintf(buf, buflen, fmt, ap); + ce = CE_NOTE; + break; + case DER_WARN: + (void) vsnprintf(buf, buflen, fmt, ap); + ce = CE_WARN; + break; + case DER_MODE: + (void) vsnprintf(buf, buflen, fmt, ap); + if (ddi_err_panic == B_TRUE) { + ce = CE_PANIC; + } else { + ce = CE_WARN; + } + break; + case DER_DEBUG: + (void) snprintf(buf, buflen, "DEBUG: "); + tlen = strlen("DEBUG: "); + (void) vsnprintf(buf + tlen, buflen - tlen, fmt, ap); + ce = CE_CONT; + break; + case DER_PANIC: + (void) vsnprintf(buf, buflen, fmt, ap); + ce = CE_PANIC; + break; + case DER_INVALID: + default: + (void) snprintf(buf, buflen, fmtbad); + tlen = strlen(fmtbad); + (void) vsnprintf(buf + tlen, buflen - tlen, fmt, ap); + ce = CE_PANIC; + break; + } + va_end(ap); + + cmn_err(ce, strbuf); +} + +/*ARGSUSED*/ +void +ddi_mem_update(uint64_t addr, uint64_t size) +{ +#if defined(__x86) && !defined(__xpv) + extern void immu_physmem_update(uint64_t addr, uint64_t size); + immu_physmem_update(addr, size); +#else + /*LINTED*/ + ; +#endif +}
--- a/usr/src/uts/common/os/mem_config.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/common/os/mem_config.c Sat Jan 30 18:23:16 2010 -0800 @@ -562,6 +562,12 @@ if (nlgrps == 1) lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); + /* + * Inform DDI of update + */ + ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT, + (uint64_t)(tpgs) << PAGESHIFT); + delspan_unreserve(pt_base, tpgs); return (KPHYSM_OK); /* Successfully added system memory */
--- a/usr/src/uts/common/sys/ddi_impldefs.h Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/common/sys/ddi_impldefs.h Sat Jan 30 18:23:16 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -257,8 +257,8 @@ struct i_ddi_prop_dyn *devi_prop_dyn_driver; /* prop_op */ struct i_ddi_prop_dyn *devi_prop_dyn_parent; /* bus_prop_op */ - /* For intel iommu support */ - void *devi_iommu_private; + /* For x86 (Intel and AMD) IOMMU support */ + void *devi_iommu; /* IOMMU handle */ iommulib_handle_t devi_iommulib_handle; @@ -596,12 +596,16 @@ #define DEVI_RETIRING 0x00000200 /* being evaluated for retire */ #define DEVI_R_CONSTRAINT 0x00000400 /* constraints have been applied */ #define DEVI_R_BLOCKED 0x00000800 /* constraints block retire */ -#define DEVI_CT_NOP 0x00001000 /* NOP contract event occurred */ +#define DEVI_CT_NOP 0x00001000 /* NOP contract event occurred */ +#define DEVI_PCI_DEVICE 0x00002000 /* dip is PCI */ #define DEVI_BUSY_CHANGING(dip) (DEVI(dip)->devi_flags & DEVI_BUSY) #define DEVI_BUSY_OWNED(dip) (DEVI_BUSY_CHANGING(dip) && \ ((DEVI(dip))->devi_busy_thread == curthread)) +#define DEVI_IS_PCI(dip) (DEVI(dip)->devi_flags & DEVI_PCI_DEVICE) +#define DEVI_SET_PCI(dip) (DEVI(dip)->devi_flags |= (DEVI_PCI_DEVICE)) + char *i_ddi_devi_class(dev_info_t *); int i_ddi_set_devi_class(dev_info_t *, char *, int);
--- a/usr/src/uts/common/sys/ddidmareq.h Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/common/sys/ddidmareq.h Sat Jan 30 18:23:16 2010 -0800 @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_DDIDMAREQ_H #define _SYS_DDIDMAREQ_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -639,6 +637,13 @@ */ #define DDI_DMA_INUSE -9 + +/* + * DVMA disabled or not supported. use physical DMA + */ +#define DDI_DMA_USE_PHYSICAL -10 + + /* * In order for the access to a memory object to be consistent * between a device and a CPU, the function ddi_dma_sync(9F)
--- a/usr/src/uts/common/sys/sunddi.h Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/common/sys/sunddi.h Sat Jan 30 18:23:16 2010 -0800 @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -398,6 +398,26 @@ #define DDI_MODEL_NATIVE DATAMODEL_NATIVE #define DDI_MODEL_NONE DATAMODEL_NONE +/* + * Defines for ddi_err(). + */ +typedef enum ddi_err { + DER_INVALID = 0, /* must be 0 */ + DER_CONT = 1, + DER_CONS, + DER_LOG, + DER_VERB, + DER_NOTE, + DER_WARN, + DER_PANIC, + DER_MODE, + DER_DEBUG +} ddi_err_t; + +/* if set to B_TRUE is DER_MODE is equivalent to DERE_PANIC */ +extern boolean_t ddi_err_panic; +extern void ddi_err(ddi_err_t de, dev_info_t *rdip, const char *fmt, ...); + extern char *ddi_strdup(const char *str, int flag); extern char *strdup(const char *str); extern void strfree(char *str); @@ -2225,6 +2245,9 @@ ddi_cb_handle_t *ret_hdlp); int ddi_cb_unregister(ddi_cb_handle_t hdl); +/* Notify DDI of memory added */ +void ddi_mem_update(uint64_t addr, uint64_t size); + #endif /* _KERNEL */ #ifdef __cplusplus
--- a/usr/src/uts/i86pc/Makefile.files Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/Makefile.files Sat Jan 30 18:23:16 2010 -0800 @@ -20,7 +20,7 @@ # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This Makefile defines file modules in the directory uts/i86pc @@ -204,7 +204,9 @@ acpidev_resource.o \ acpidev_util.o -ROOTNEX_OBJS += rootnex.o iommu_rscs.o dmar_acpi.o intel_iommu.o +ROOTNEX_OBJS += rootnex.o immu.o immu_dmar.o immu_dvma.o \ + immu_intrmap.o immu_qinv.o immu_regs.o + TZMON_OBJS += tzmon.o UPPC_OBJS += uppc.o psm_common.o XSVC_OBJS += xsvc.o
--- a/usr/src/uts/i86pc/io/dmar_acpi.c Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,829 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Portions Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ - - -#include <sys/debug.h> -#include <sys/sysmacros.h> -#include <sys/types.h> -#include <sys/kmem.h> -#include <sys/sunddi.h> -#include <sys/list.h> -#include <sys/pci.h> -#include <sys/pci_cfgspace.h> -#include <sys/pci_impl.h> -#include <sys/sunndi.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/bootconf.h> -#include <sys/int_fmtio.h> -#include <sys/dmar_acpi.h> -#include <sys/smbios.h> -#include <sys/iommulib.h> - -/* - * the following pci manipulate function pinter - * are defined in pci_cfgspace.h - */ -#define pci_getb (*pci_getb_func) - -/* - * define for debug - */ -int intel_dmar_acpi_debug = 0; -#define dcmn_err if (intel_dmar_acpi_debug) cmn_err - -/* - * define for printing blacklist ID - */ -int intel_iommu_blacklist_id; - -/* - * global varables - */ -boolean_t intel_iommu_support; -intel_dmar_info_t *dmar_info; - -/* - * global variables to save source id and drhd info for ioapic - * to support interrupt remapping - */ -list_t ioapic_drhd_infos; - -/* - * internal variables - */ -static void *dmart; - -/* - * helper functions to release the allocated resources - * when failed - */ -static void -release_dev_scope(list_t *lp) -{ - pci_dev_scope_t *devs; - - if (list_is_empty(lp)) - return; - - while ((devs = list_head(lp)) != NULL) { - list_remove(lp, devs); - kmem_free(devs, sizeof (pci_dev_scope_t)); - } -} - -static void -release_drhd_info(void) -{ - drhd_info_t *drhd; - list_t *lp; - int i; - - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - lp = &dmar_info->dmari_drhd[i]; - if (list_is_empty(lp)) - break; - - while ((drhd = list_head(lp)) != NULL) { - list_remove(lp, drhd); - - /* - * release the device scope - */ - release_dev_scope(&drhd->di_dev_list); - list_destroy(&drhd->di_dev_list); - kmem_free(drhd, sizeof (drhd_info_t)); - } - } -} - -static void -release_rmrr_info(void) -{ - rmrr_info_t *rmrr; - list_t *lp; - int i; - - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - lp = &dmar_info->dmari_rmrr[i]; - if (list_is_empty(lp)) - break; - - while ((rmrr = list_head(lp)) != NULL) { - list_remove(lp, rmrr); - release_dev_scope(&rmrr->ri_dev_list); - list_destroy(&rmrr->ri_dev_list); - kmem_free(rmrr, sizeof (rmrr_info_t)); - } - } -} - -/* - * intel_iommu_release_dmar_info() - * global function, which is called to release dmar_info - * when the dmar_intel_iommu_supportinfo is not - * needed any more. - */ -void -intel_iommu_release_dmar_info(void) -{ - int i; - - intel_iommu_support = B_FALSE; - release_drhd_info(); - release_rmrr_info(); - - /* - * destroy the drhd and rmrr list - */ - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - list_destroy(&dmar_info->dmari_drhd[i]); - list_destroy(&dmar_info->dmari_rmrr[i]); - } - - kmem_free(dmar_info, sizeof (intel_dmar_info_t)); -} - -/* - * create_dmar_devi() - * - * create the dev_info node in the device tree, - * the info node is a nuxus child of the root - * nexus - */ -static void -create_dmar_devi(void) -{ - dev_info_t *dip; - drhd_info_t *drhd; - struct regspec reg; - struct ddi_parent_private_data *pdptr; - char nodename[64]; - int i, j; - - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - - /* - * ignore the empty list - */ - if (list_is_empty(&dmar_info->dmari_drhd[i])) - break; - - /* - * alloc dev_info per drhd unit - */ - j = 0; - for_each_in_list(&dmar_info->dmari_drhd[i], drhd) { - (void) snprintf(nodename, sizeof (nodename), - "dmar%d,%d", drhd->di_segment, j++); - ndi_devi_alloc_sleep(ddi_root_node(), nodename, - DEVI_SID_NODEID, &dip); - drhd->di_dip = dip; - reg.regspec_bustype = 0; - reg.regspec_addr = drhd->di_reg_base; - reg.regspec_size = IOMMU_REG_SIZE; - - /* - * update the reg properties - * - * reg property will be used for register - * set access - * - * refer to the bus_map of root nexus driver - * I/O or memory mapping: - * - * <bustype=0, addr=x, len=x>: memory - * <bustype=1, addr=x, len=x>: i/o - * <bustype>1, addr=0, len=x>: x86-compatibility i/o - */ - (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, - dip, "reg", (int *)®, - sizeof (struct regspec) / sizeof (int)); - - pdptr = (struct ddi_parent_private_data *) - kmem_zalloc(sizeof (struct ddi_parent_private_data) - + sizeof (struct regspec), KM_SLEEP); - pdptr->par_nreg = 1; - pdptr->par_reg = (struct regspec *)(pdptr + 1); - pdptr->par_reg->regspec_bustype = 0; - pdptr->par_reg->regspec_addr = drhd->di_reg_base; - pdptr->par_reg->regspec_size = IOMMU_REG_SIZE; - ddi_set_parent_data(dip, pdptr); - } - } -} - -/* - * parse_dmar_dev_scope() - * parse the device scope attached to drhd or rmrr - */ -static int -parse_dmar_dev_scope(dmar_acpi_dev_scope_t *scope, pci_dev_scope_t **devs) -{ - int depth; - int bus, dev, func; - pci_dev_scope_t *entry; - - struct path_to_dev { - uint8_t device; - uint8_t function; - } *path; - - path = (struct path_to_dev *)(scope + 1); - depth = (scope->ds_length - 6)/2; - bus = scope->ds_sbusnum; - dev = path->device; - func = path->function; - - while (--depth) { - path++; - bus = pci_getb(bus, dev, func, PCI_BCNF_SECBUS); - dev = path->device; - func = path->function; - } - - entry = (pci_dev_scope_t *)kmem_zalloc( - sizeof (pci_dev_scope_t), KM_SLEEP); - entry->pds_bus = bus; - entry->pds_dev = dev; - entry->pds_func = func; - entry->pds_type = scope->ds_type; - - *devs = entry; - return (PARSE_DMAR_SUCCESS); -} - -/* - * parse_dmar_rmrr() - * parse the rmrr units in dmar table - */ -static int -parse_dmar_rmrr(dmar_acpi_unit_head_t *head) -{ - dmar_acpi_rmrr_t *rmrr; - rmrr_info_t *rinfo; - dmar_acpi_dev_scope_t *scope; - pci_dev_scope_t *devs; - - rmrr = (dmar_acpi_rmrr_t *)head; - ASSERT(head->uh_type == DMAR_UNIT_TYPE_RMRR); - ASSERT(rmrr->rm_segment <= DMAR_MAX_SEGMENT); - - /* - * for each rmrr, limiaddr must > baseaddr - */ - if (rmrr->rm_baseaddr >= rmrr->rm_limiaddr) { - cmn_err(CE_NOTE, "Invalid BIOS RMRR: Disabling Intel IOMMU"); - cmn_err(CE_WARN, "!invalid rmrr," - " baseaddr = 0x%" PRIx64 - ", limiaddr = 0x%" PRIx64 "", - rmrr->rm_baseaddr, rmrr->rm_limiaddr); - return (PARSE_DMAR_FAIL); - } - - /* - * allocate and setup the device info structure - */ - rinfo = (rmrr_info_t *)kmem_zalloc(sizeof (rmrr_info_t), - KM_SLEEP); - rinfo->ri_segment = rmrr->rm_segment; - rinfo->ri_baseaddr = rmrr->rm_baseaddr; - rinfo->ri_limiaddr = rmrr->rm_limiaddr; - list_create(&rinfo->ri_dev_list, sizeof (pci_dev_scope_t), - offsetof(pci_dev_scope_t, node)); - - /* - * parse the device scope - */ - scope = (dmar_acpi_dev_scope_t *)(rmrr + 1); - while ((unsigned long)scope < ((unsigned long)rmrr + head->uh_length)) { - if (parse_dmar_dev_scope(scope, &devs) - != PARSE_DMAR_SUCCESS) { - return (PARSE_DMAR_FAIL); - } - - list_insert_tail(&rinfo->ri_dev_list, devs); - scope = (dmar_acpi_dev_scope_t *)((unsigned long)scope - + scope->ds_length); - } - - /* - * save this info structure - */ - list_insert_tail(&dmar_info->dmari_rmrr[rinfo->ri_segment], rinfo); - return (PARSE_DMAR_SUCCESS); -} - -/* - * parse_dmar_drhd() - * parse the drhd uints in dmar table - */ -static int -parse_dmar_drhd(dmar_acpi_unit_head_t *head) -{ - dmar_acpi_drhd_t *drhd; - drhd_info_t *dinfo; - dmar_acpi_dev_scope_t *scope; - list_t *lp; - pci_dev_scope_t *devs; - ioapic_drhd_info_t *ioapic_dinfo; - - drhd = (dmar_acpi_drhd_t *)head; - ASSERT(head->uh_type == DMAR_UNIT_TYPE_DRHD); - - /* - * assert the segment boundary - */ - ASSERT(drhd->dr_segment <= DMAR_MAX_SEGMENT); - - /* - * allocate and setup the info structure - */ - dinfo = (drhd_info_t *)kmem_zalloc(sizeof (drhd_info_t), KM_SLEEP); - dinfo->di_segment = drhd->dr_segment; - dinfo->di_reg_base = drhd->dr_baseaddr; - dinfo->di_include_all = (drhd->dr_flags & INCLUDE_PCI_ALL) ? - B_TRUE : B_FALSE; - list_create(&dinfo->di_dev_list, sizeof (pci_dev_scope_t), - offsetof(pci_dev_scope_t, node)); - - /* - * parse the device scope - */ - scope = (dmar_acpi_dev_scope_t *)(drhd + 1); - while ((unsigned long)scope < ((unsigned long)drhd + - head->uh_length)) { - - if (parse_dmar_dev_scope(scope, &devs) - != PARSE_DMAR_SUCCESS) { - return (PARSE_DMAR_FAIL); - } - /* get ioapic source id for interrupt remapping */ - if (devs->pds_type == DEV_SCOPE_IOAPIC) { - ioapic_dinfo = kmem_zalloc - (sizeof (ioapic_drhd_info_t), KM_SLEEP); - - ioapic_dinfo->ioapic_id = scope->ds_enumid; - ioapic_dinfo->sid = - (devs->pds_bus << 8) | - (devs->pds_dev << 3) | - (devs->pds_func); - ioapic_dinfo->drhd = dinfo; - list_insert_tail(&ioapic_drhd_infos, ioapic_dinfo); - } - - list_insert_tail(&dinfo->di_dev_list, devs); - scope = (dmar_acpi_dev_scope_t *)((unsigned long)scope + - scope->ds_length); - } - - lp = &dmar_info->dmari_drhd[dinfo->di_segment]; - list_insert_tail(lp, dinfo); - return (PARSE_DMAR_SUCCESS); -} - -#define OEMID_OFF 10 -#define OEMID_LEN 6 -#define OEM_TBLID_OFF 16 -#define OEM_TBLID_LEN 8 -#define OEMREV_OFF 24 -#define OEMREV_LEN 4 - -static int -dmar_blacklisted(caddr_t dmart) -{ - char oemid[OEMID_LEN + 1] = {0}; - char oem_tblid[OEM_TBLID_LEN + 1] = {0}; - char oemrev[OEMREV_LEN + 1] = {0}; - const char *mfgr = "?"; - const char *product = "?"; - const char *version = "?"; - smbios_info_t smbios_info; - smbios_system_t smbios_sys; - id_t id; - char **blacklist; - int i; - uint_t n; - - (void) strncpy(oemid, dmart + OEMID_OFF, OEMID_LEN); - (void) strncpy(oem_tblid, dmart + OEM_TBLID_OFF, OEM_TBLID_LEN); - (void) strncpy(oemrev, dmart + OEMREV_OFF, OEMREV_LEN); - - iommulib_smbios = smbios_open(NULL, SMB_VERSION, ksmbios_flags, - NULL); - if (iommulib_smbios && - (id = smbios_info_system(iommulib_smbios, &smbios_sys)) - != SMB_ERR && - smbios_info_common(iommulib_smbios, id, &smbios_info) - != SMB_ERR) { - mfgr = smbios_info.smbi_manufacturer; - product = smbios_info.smbi_product; - version = smbios_info.smbi_version; - } - - if (intel_iommu_blacklist_id) { - cmn_err(CE_NOTE, "SMBIOS ID:"); - cmn_err(CE_NOTE, "Manufacturer = <%s>", mfgr); - cmn_err(CE_NOTE, "Product = <%s>", product); - cmn_err(CE_NOTE, "Version = <%s>", version); - cmn_err(CE_NOTE, "DMAR ID:"); - cmn_err(CE_NOTE, "oemid = <%s>", oemid); - cmn_err(CE_NOTE, "oemtblid = <%s>", oem_tblid); - cmn_err(CE_NOTE, "oemrev = <%s>", oemrev); - } - - /* - * Fake up a dev_t since searching global prop list needs it - */ - if (ddi_prop_lookup_string_array( - makedevice(ddi_name_to_major("rootnex"), 0), ddi_root_node(), - DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, - "intel-iommu-blacklist", &blacklist, &n) != DDI_PROP_SUCCESS) { - /* No blacklist */ - return (0); - } - - if (n < 4 || n % 4 != 0) { - cmn_err(CE_WARN, - "invalid Intel IOMMU blacklist: not a multiple of four"); - ddi_prop_free(blacklist); - return (0); - } - - for (i = 0; i < n; i += 4) { - if (strcmp(blacklist[i], "SMBIOS") == 0 && - strcmp(blacklist[i+1], mfgr) == 0 && - (blacklist[i+2][0] == '\0' || - strcmp(blacklist[i+2], product) == 0) && - (blacklist[i+3][0] == '\0' || - strcmp(blacklist[i+3], version) == 0)) { - ddi_prop_free(blacklist); - return (1); - } - if (strcmp(blacklist[i], "DMAR") == 0 && - strcmp(blacklist[i+1], oemid) == 0 && - (blacklist[i+2][0] == '\0' || - strcmp(blacklist[i+2], oem_tblid) == 0) && - (blacklist[i+3][0] == '\0' || - strcmp(blacklist[i+3], oemrev) == 0)) { - ddi_prop_free(blacklist); - return (1); - } - } - - ddi_prop_free(blacklist); - - return (0); -} - -/* - * parse_dmar() - * parse the dmar table - */ -static int -parse_dmar(void) -{ - dmar_acpi_head_t *dmar_head; - dmar_acpi_unit_head_t *unit_head; - drhd_info_t *drhd; - int i; - - dmar_head = (dmar_acpi_head_t *)dmart; - - /* - * do a sanity check - */ - if (!dmar_head || strncmp(dmar_head->dh_sig, "DMAR", 4)) { - dcmn_err(CE_CONT, "wrong DMAR signature: %c%c%c%c", - dmar_head->dh_sig[0], dmar_head->dh_sig[1], - dmar_head->dh_sig[2], dmar_head->dh_sig[3]); - return (PARSE_DMAR_FAIL); - } - - if (dmar_blacklisted(dmart)) { - cmn_err(CE_NOTE, "Intel IOMMU is blacklisted on this platform"); - return (PARSE_DMAR_FAIL); - } - - dmar_info->dmari_haw = dmar_head->dh_haw + 1; - dmar_info->dmari_intr_remap = dmar_head->dh_flags & 0x1 ? - B_TRUE : B_FALSE; - - /* - * parse each unit - * only DRHD and RMRR are parsed, others are ignored - */ - unit_head = (dmar_acpi_unit_head_t *)(dmar_head + 1); - while ((unsigned long)unit_head < (unsigned long)dmar_head + - dmar_head->dh_len) { - switch (unit_head->uh_type) { - case DMAR_UNIT_TYPE_DRHD: - if (parse_dmar_drhd(unit_head) != - PARSE_DMAR_SUCCESS) { - - /* - * iommu_detect_parse() will release - * all drhd info structure, just - * return false here - */ - return (PARSE_DMAR_FAIL); - } - break; - case DMAR_UNIT_TYPE_RMRR: - if (parse_dmar_rmrr(unit_head) != - PARSE_DMAR_SUCCESS) - return (PARSE_DMAR_FAIL); - break; - default: - cmn_err(CE_CONT, "!DMAR ACPI table: " - "unit type %d ignored\n", unit_head->uh_type); - } - unit_head = (dmar_acpi_unit_head_t *) - ((unsigned long)unit_head + - unit_head->uh_length); - } - -#ifdef DEBUG - /* - * make sure the include_all drhd is the - * last drhd in the list, this is only for - * debug - */ - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - if (list_is_empty(&dmar_info->dmari_drhd[i])) - break; - - for_each_in_list(&dmar_info->dmari_drhd[i], drhd) { - if (drhd->di_include_all && - list_next(&dmar_info->dmari_drhd[i], drhd) - != NULL) { - list_remove(&dmar_info->dmari_drhd[i], drhd); - list_insert_tail(&dmar_info->dmari_drhd[i], - drhd); - dcmn_err(CE_CONT, - "include_all drhd is adjusted\n"); - } - } - } -#endif - - return (PARSE_DMAR_SUCCESS); -} - -/* - * detect_dmar() - * detect the dmar acpi table - */ -static boolean_t -detect_dmar(void) -{ - int len; - char *intel_iommu; - char *enable; - - /* - * if "intel-iommu = no" boot property is set, - * ignore intel iommu - */ - if ((len = do_bsys_getproplen(NULL, "intel-iommu")) > 0) { - intel_iommu = kmem_alloc(len, KM_SLEEP); - (void) do_bsys_getprop(NULL, "intel-iommu", intel_iommu); - if (strcmp(intel_iommu, "no") == 0) { - dcmn_err(CE_CONT, "\"intel-iommu=no\" was set\n"); - kmem_free(intel_iommu, len); - return (B_FALSE); - } - kmem_free(intel_iommu, len); - } - - /* - * Check rootnex.conf for enable/disable IOMMU - * Fake up a dev_t since searching global prop list needs it - */ - if (ddi_prop_lookup_string( - makedevice(ddi_name_to_major("rootnex"), 0), ddi_root_node(), - DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, - "intel-iommu", &enable) == DDI_PROP_SUCCESS) { - if (strcmp(enable, "false") == 0 || strcmp(enable, "no") == 0) { - dcmn_err(CE_CONT, - "\"intel-iommu=no\" set in rootnex.conf\n"); - ddi_prop_free(enable); - return (B_FALSE); - } - ddi_prop_free(enable); - } - - /* - * get dmar-table from system properties - */ - if ((len = do_bsys_getproplen(NULL, DMAR_TABLE_PROPNAME)) <= 0) { - dcmn_err(CE_CONT, "dmar-table getprop failed\n"); - return (B_FALSE); - } - dcmn_err(CE_CONT, "dmar-table length = %d\n", len); - dmart = kmem_alloc(len, KM_SLEEP); - (void) do_bsys_getprop(NULL, DMAR_TABLE_PROPNAME, dmart); - - return (B_TRUE); -} - -/* - * print dmar_info for debug - */ -static void -print_dmar_info(void) -{ - drhd_info_t *drhd; - rmrr_info_t *rmrr; - pci_dev_scope_t *dev; - int i; - - /* print the title */ - cmn_err(CE_CONT, "dmar_info->:\n"); - cmn_err(CE_CONT, "\thaw = %d\n", dmar_info->dmari_haw); - cmn_err(CE_CONT, "\tintr_remap = %d\n", - dmar_info->dmari_intr_remap ? 1 : 0); - - /* print drhd info list */ - cmn_err(CE_CONT, "\ndrhd list:\n"); - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - if (list_is_empty(&dmar_info->dmari_drhd[i])) - break; - for (drhd = list_head(&dmar_info->dmari_drhd[i]); - drhd != NULL; drhd = list_next(&dmar_info->dmari_drhd[i], - drhd)) { - cmn_err(CE_CONT, "\n\tsegment = %d\n", - drhd->di_segment); - cmn_err(CE_CONT, "\treg_base = 0x%" PRIx64 "\n", - drhd->di_reg_base); - cmn_err(CE_CONT, "\tinclude_all = %s\n", - drhd->di_include_all ? "yes" : "no"); - cmn_err(CE_CONT, "\tdip = 0x%p\n", - (void *)drhd->di_dip); - cmn_err(CE_CONT, "\tdevice list:\n"); - for (dev = list_head(&drhd->di_dev_list); - dev != NULL; dev = list_next(&drhd->di_dev_list, - dev)) { - cmn_err(CE_CONT, "\n\t\tbus = %d\n", - dev->pds_bus); - cmn_err(CE_CONT, "\t\tdev = %d\n", - dev->pds_dev); - cmn_err(CE_CONT, "\t\tfunc = %d\n", - dev->pds_func); - cmn_err(CE_CONT, "\t\ttype = %d\n", - dev->pds_type); - } - } - } - - /* print rmrr info list */ - cmn_err(CE_CONT, "\nrmrr list:\n"); - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - if (list_is_empty(&dmar_info->dmari_rmrr[i])) - break; - for (rmrr = list_head(&dmar_info->dmari_rmrr[i]); - rmrr != NULL; rmrr = list_next(&dmar_info->dmari_rmrr[i], - rmrr)) { - cmn_err(CE_CONT, "\n\tsegment = %d\n", - rmrr->ri_segment); - cmn_err(CE_CONT, "\tbaseaddr = 0x%" PRIx64 "\n", - rmrr->ri_baseaddr); - cmn_err(CE_CONT, "\tlimiaddr = 0x%" PRIx64 "\n", - rmrr->ri_limiaddr); - cmn_err(CE_CONT, "\tdevice list:\n"); - for (dev = list_head(&rmrr->ri_dev_list); - dev != NULL; - dev = list_next(&rmrr->ri_dev_list, dev)) { - cmn_err(CE_CONT, "\n\t\tbus = %d\n", - dev->pds_bus); - cmn_err(CE_CONT, "\t\tdev = %d\n", - dev->pds_dev); - cmn_err(CE_CONT, "\t\tfunc = %d\n", - dev->pds_func); - cmn_err(CE_CONT, "\t\ttype = %d\n", - dev->pds_type); - } - } - } -} - -/* - * intel_iommu_probe_and_parse() - * called from rootnex driver - */ -void -intel_iommu_probe_and_parse(void) -{ - int i, len; - char *opt; - - dmar_info = NULL; - - /* - * retrieve the print-dmar-acpi boot option - */ - if ((len = do_bsys_getproplen(NULL, "print-dmar-acpi")) > 0) { - opt = kmem_alloc(len, KM_SLEEP); - (void) do_bsys_getprop(NULL, "print-dmar-acpi", opt); - if (strcmp(opt, "yes") == 0 || - strcmp(opt, "true") == 0) { - intel_dmar_acpi_debug = 1; - cmn_err(CE_CONT, "\"print-dmar-acpi=true\" was set\n"); - } else if (strcmp(opt, "no") == 0 || - strcmp(opt, "false") == 0) { - intel_dmar_acpi_debug = 0; - cmn_err(CE_CONT, "\"print-dmar-acpi=false\" was set\n"); - } - kmem_free(opt, len); - } - - /* - * retrieve the print-iommu-blacklist-id boot option - */ - if ((len = do_bsys_getproplen(NULL, "print-iommu-blacklist-id")) > 0) { - opt = kmem_alloc(len, KM_SLEEP); - (void) do_bsys_getprop(NULL, "print-iommu-blacklist-id", opt); - if (strcmp(opt, "yes") == 0 || - strcmp(opt, "true") == 0) { - intel_iommu_blacklist_id = 1; - } else if (strcmp(opt, "no") == 0 || - strcmp(opt, "false") == 0) { - intel_iommu_blacklist_id = 0; - } - kmem_free(opt, len); - } - - - dcmn_err(CE_CONT, "intel iommu detect start\n"); - - if (detect_dmar() == B_FALSE) { - dcmn_err(CE_CONT, "no intel iommu detected\n"); - return; - } - - /* - * the platform has intel iommu, setup globals - */ - intel_iommu_support = B_TRUE; - dmar_info = kmem_zalloc(sizeof (intel_dmar_info_t), - KM_SLEEP); - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - list_create(&(dmar_info->dmari_drhd[i]), sizeof (drhd_info_t), - offsetof(drhd_info_t, node)); - list_create(&(dmar_info->dmari_rmrr[i]), sizeof (rmrr_info_t), - offsetof(rmrr_info_t, node)); - } - - /* create ioapic - drhd map info for interrupt remapping */ - list_create(&ioapic_drhd_infos, sizeof (ioapic_drhd_info_t), - offsetof(ioapic_drhd_info_t, node)); - - /* - * parse dmar acpi table - */ - if (parse_dmar() != PARSE_DMAR_SUCCESS) { - intel_iommu_release_dmar_info(); - dcmn_err(CE_CONT, "DMAR parse failed\n"); - return; - } - - /* - * create dev_info structure per hrhd - * and prepare it for binding driver - */ - create_dmar_devi(); - - /* - * print the dmar info if the debug - * is set - */ - if (intel_dmar_acpi_debug) - print_dmar_info(); -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,1033 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* + * Intel IOMMU implementation + * This file contains Intel IOMMU code exported + * to the rest of the system and code that deals + * with the Intel IOMMU as a whole. + */ + +#include <sys/conf.h> +#include <sys/modctl.h> +#include <sys/pci.h> +#include <sys/pci_impl.h> +#include <sys/sysmacros.h> +#include <sys/ddi.h> +#include <sys/ddidmareq.h> +#include <sys/ddi_impldefs.h> +#include <sys/ddifm.h> +#include <sys/sunndi.h> +#include <sys/debug.h> +#include <sys/fm/protocol.h> +#include <sys/note.h> +#include <sys/apic.h> +#include <vm/hat_i86.h> +#include <sys/smp_impldefs.h> +#include <sys/spl.h> +#include <sys/archsystm.h> +#include <sys/x86_archext.h> +#include <sys/rootnex.h> +#include <sys/avl.h> +#include <sys/bootconf.h> +#include <sys/bootinfo.h> +#include <sys/atomic.h> +#include <sys/immu.h> + +/* ########################### Globals and tunables ######################## */ +/* + * Global switches (boolean) that can be toggled either via boot options + * or via /etc/system or kmdb + */ + +/* Various features */ +boolean_t immu_enable = B_TRUE; +boolean_t immu_dvma_enable = B_TRUE; + +/* accessed in other files so not static */ +boolean_t immu_gfxdvma_enable = B_TRUE; +boolean_t immu_intrmap_enable = B_FALSE; +boolean_t immu_qinv_enable = B_FALSE; + +/* various quirks that need working around */ + +/* XXX We always map page 0 read/write for now */ +boolean_t immu_quirk_usbpage0 = B_TRUE; +boolean_t immu_quirk_usbrmrr = B_TRUE; +boolean_t immu_quirk_usbfullpa; +boolean_t immu_quirk_mobile4; + +boolean_t immu_mmio_safe = B_TRUE; + +/* debug messages */ +boolean_t immu_dmar_print; + +/* ############ END OPTIONS section ################ */ + +/* + * Global used internally by Intel IOMMU code + */ +dev_info_t *root_devinfo; +kmutex_t immu_lock; +list_t immu_list; +boolean_t immu_setup; +boolean_t immu_running; +boolean_t immu_quiesced; + +/* ######################## END Globals and tunables ###################### */ +/* Globals used only in this file */ +static char **black_array; +static uint_t nblacks; +/* ###################### Utility routines ############################# */ + +/* + * Check if the device has mobile 4 chipset + */ +static int +check_mobile4(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)); + int vendor, device; + int *ip = (int *)arg; + + ASSERT(arg); + + vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, + "vendor-id", -1); + device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, + "device-id", -1); + + if (vendor == 0x8086 && device == 0x2a40) { + *ip = B_TRUE; + ddi_err(DER_NOTE, dip, "IMMU: Mobile 4 chipset detected. " + "Force setting IOMMU write buffer"); + return (DDI_WALK_TERMINATE); + } else { + return (DDI_WALK_CONTINUE); + } +} + +static void +map_bios_rsvd_mem(dev_info_t *dip) +{ + struct memlist *mp; + int e; + + memlist_read_lock(); + + mp = bios_rsvd; + while (mp != NULL) { + memrng_t *mrng = {0}; + + ddi_err(DER_LOG, dip, "IMMU: Mapping BIOS rsvd range " + "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address, + mp->ml_address + mp->ml_size); + + mrng->mrng_start = IMMU_ROUNDOWN(mp->ml_address); + mrng->mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE; + + e = immu_dvma_map(NULL, NULL, mrng, 0, dip, IMMU_FLAGS_MEMRNG); + ASSERT(e == DDI_DMA_MAPPED || e == DDI_DMA_USE_PHYSICAL); + + mp = mp->ml_next; + } + + memlist_read_unlock(); +} + +/* + * Check if the device is USB controller + */ +/*ARGSUSED*/ +static void +check_usb(dev_info_t *dip, void *arg) +{ + const char *drv = ddi_driver_name(dip); + + if (drv == NULL || + (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 && + strcmp(drv, "ehci") != 0)) { + return; + } + + /* This must come first since it does unity mapping */ + if (immu_quirk_usbfullpa == B_TRUE) { + int e; + ddi_err(DER_NOTE, dip, "Applying USB FULL PA quirk"); + e = immu_dvma_map(NULL, NULL, NULL, 0, dip, IMMU_FLAGS_UNITY); + /* for unity mode, map will return USE_PHYSICAL */ + ASSERT(e == DDI_DMA_USE_PHYSICAL); + } + + if (immu_quirk_usbrmrr == B_TRUE) { + ddi_err(DER_LOG, dip, "Applying USB RMRR quirk"); + map_bios_rsvd_mem(dip); + } +} + +/* + * Check if the device is a LPC device + */ +/*ARGSUSED*/ +static void +check_lpc(dev_info_t *dip, void *arg) +{ + immu_devi_t *immu_devi; + + immu_devi = immu_devi_get(dip); + ASSERT(immu_devi); + if (immu_devi->imd_lpc == B_TRUE) { + ddi_err(DER_LOG, dip, "IMMU: Found LPC device"); + /* This will put the immu_devi on the LPC "specials" list */ + (void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP); + } +} + +/* + * Check if the device is a GFX device + */ +/*ARGSUSED*/ +static void +check_gfx(dev_info_t *dip, void *arg) +{ + immu_devi_t *immu_devi; + int e; + + immu_devi = immu_devi_get(dip); + ASSERT(immu_devi); + if (immu_devi->imd_display == B_TRUE) { + ddi_err(DER_LOG, dip, "IMMU: Found GFX device"); + /* This will put the immu_devi on the GFX "specials" list */ + (void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP); + e = immu_dvma_map(NULL, NULL, NULL, 0, dip, IMMU_FLAGS_UNITY); + /* for unity mode, map will return USE_PHYSICAL */ + ASSERT(e == DDI_DMA_USE_PHYSICAL); + } +} + +static void +walk_tree(int (*f)(dev_info_t *, void *), void *arg) +{ + int count; + + ndi_devi_enter(root_devinfo, &count); + ddi_walk_devs(ddi_get_child(root_devinfo), f, arg); + ndi_devi_exit(root_devinfo, count); +} + +static int +check_pre_setup_quirks(dev_info_t *dip, void *arg) +{ + /* just 1 check right now */ + return (check_mobile4(dip, arg)); +} + +static int +check_pre_startup_quirks(dev_info_t *dip, void *arg) +{ + if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) { + ddi_err(DER_PANIC, dip, "Failed to get immu_devi"); + } + + check_gfx(dip, arg); + + check_lpc(dip, arg); + + check_usb(dip, arg); + + return (DDI_WALK_CONTINUE); +} + +static void +pre_setup_quirks(void) +{ + walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4); +} + +static void +pre_startup_quirks(void) +{ + walk_tree(check_pre_startup_quirks, NULL); + + immu_dmar_rmrr_map(); +} + +/* + * get_bootopt() + * check a boot option (always a boolean) + */ +static void +get_bootopt(char *bopt, boolean_t *kvar) +{ + char *val = NULL; + + ASSERT(bopt); + ASSERT(kvar); + + /* + * All boot options set at the GRUB menu become + * properties on the rootnex. + */ + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo, + DDI_PROP_DONTPASS, bopt, &val) == DDI_SUCCESS) { + ASSERT(val); + if (strcmp(val, "true") == 0) { + *kvar = B_TRUE; + } else if (strcmp(val, "false") == 0) { + *kvar = B_FALSE; + } else { + ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ", + "is not set to true or false. Ignoring option.", + bopt, val); + } + ddi_prop_free(val); + } +} + +static void +read_boot_options(void) +{ + /* enable/disable options */ + get_bootopt("immu-enable", &immu_enable); + get_bootopt("immu-dvma-enable", &immu_dvma_enable); + get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable); + get_bootopt("immu-intrmap-enable", &immu_intrmap_enable); + get_bootopt("immu-qinv-enable", &immu_qinv_enable); + get_bootopt("immu-mmio-safe", &immu_mmio_safe); + + /* workaround switches */ + get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0); + get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa); + get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr); + + /* debug printing */ + get_bootopt("immu-dmar-print", &immu_dmar_print); +} + +/* + * Note, this will not catch hardware not enumerated + * in early boot + */ +static boolean_t +blacklisted_driver(void) +{ + char **strptr; + int i; + major_t maj; + + ASSERT((black_array == NULL) ^ (nblacks != 0)); + + /* need at least 2 strings */ + if (nblacks < 2) { + return (B_FALSE); + } + + strptr = black_array; + for (i = 0; nblacks - i > 1; i++) { + if (strcmp(*strptr++, "DRIVER") == 0) { + if ((maj = ddi_name_to_major(*strptr++)) + != DDI_MAJOR_T_NONE) { + /* is there hardware bound to this drvr */ + if (devnamesp[maj].dn_head != NULL) { + return (B_TRUE); + } + } + i += 1; /* for loop adds 1, so add only 1 here */ + } + } + + return (B_FALSE); +} + +static boolean_t +blacklisted_smbios(void) +{ + id_t smid; + smbios_hdl_t *smhdl; + smbios_info_t sminf; + smbios_system_t smsys; + char *mfg, *product, *version; + char **strptr; + int i; + + ASSERT((black_array == NULL) ^ (nblacks != 0)); + + /* need at least 4 strings for this setting */ + if (nblacks < 4) { + return (B_FALSE); + } + + smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL); + if (smhdl == NULL || + (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR || + smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) { + return (B_FALSE); + } + + mfg = (char *)sminf.smbi_manufacturer; + product = (char *)sminf.smbi_product; + version = (char *)sminf.smbi_version; + + ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n"); + ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg); + ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product); + ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version); + + strptr = black_array; + for (i = 0; nblacks - i > 3; i++) { + if (strcmp(*strptr++, "SMBIOS") == 0) { + if (strcmp(*strptr++, mfg) == 0 && + ((char *)strptr == '\0' || + strcmp(*strptr++, product) == 0) && + ((char *)strptr == '\0' || + strcmp(*strptr++, version) == 0)) { + return (B_TRUE); + } + i += 3; + } + } + + return (B_FALSE); +} + +static boolean_t +blacklisted_acpi(void) +{ + ASSERT((black_array == NULL) ^ (nblacks != 0)); + if (nblacks == 0) { + return (B_FALSE); + } + + return (immu_dmar_blacklisted(black_array, nblacks)); +} + +/* + * Check if system is blacklisted by Intel IOMMU driver + * i.e. should Intel IOMMU be disabled on this system + * Currently a system can be blacklistd based on the + * following bases: + * + * 1. DMAR ACPI table information. + * This information includes things like + * manufacturer and revision number. If rootnex.conf + * has matching info set in its blacklist property + * then Intel IOMMu will be disabled + * + * 2. SMBIOS information + * + * 3. Driver installed - useful if a particular + * driver or hardware is toxic if Intel IOMMU + * is turned on. + */ + +static void +blacklist_setup(void) +{ + char **string_array; + uint_t nstrings; + + /* + * Check the rootnex.conf blacklist property. + * Fake up a dev_t since searching the global + * property list needs it + */ + if (ddi_prop_lookup_string_array( + makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, + DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist", + &string_array, &nstrings) != DDI_PROP_SUCCESS) { + return; + } + + /* smallest blacklist criteria works with multiples of 2 */ + if (nstrings % 2 != 0) { + ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist " + "rootnex.conf: number of strings must be a " + "multiple of 2"); + ddi_prop_free(string_array); + return; + } + + black_array = string_array; + nblacks = nstrings; +} + +static void +blacklist_destroy(void) +{ + if (black_array) { + ddi_prop_free(black_array); + black_array = NULL; + nblacks = 0; + } + + ASSERT(black_array == NULL); + ASSERT(nblacks == 0); +} + + +/* + * Now set all the fields in the order they are defined + * We do this only as a defensive-coding practice, it is + * not a correctness issue. + */ +static void * +immu_state_alloc(int seg, void *dmar_unit) +{ + immu_t *immu; + + dmar_unit = immu_dmar_walk_units(seg, dmar_unit); + if (dmar_unit == NULL) { + /* No more IOMMUs in this segment */ + return (NULL); + } + + immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP); + + mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL); + + mutex_enter(&(immu->immu_lock)); + + immu->immu_dmar_unit = dmar_unit; + immu->immu_name = ddi_strdup(immu_dmar_unit_name(dmar_unit), + KM_SLEEP); + immu->immu_dip = immu_dmar_unit_dip(dmar_unit); + + /* + * the immu_intr_lock mutex is grabbed by the IOMMU + * unit's interrupt handler so we need to use an + * interrupt cookie for the mutex + */ + mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER, + (void *)ipltospl(IMMU_INTR_IPL)); + + /* IOMMU regs related */ + mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL); + + /* DVMA related */ + immu->immu_dvma_coherent = B_FALSE; + + /* DVMA context related */ + rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL); + + /* DVMA domain related */ + list_create(&(immu->immu_domain_list), sizeof (domain_t), + offsetof(domain_t, dom_immu_node)); + + /* DVMA special device lists */ + immu->immu_dvma_gfx_only = B_FALSE; + list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t), + offsetof(immu_devi_t, imd_spc_node)); + list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t), + offsetof(immu_devi_t, imd_spc_node)); + + /* interrupt remapping related */ + mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL); + + /* qinv related */ + mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL); + + /* + * insert this immu unit into the system-wide list + */ + list_insert_tail(&immu_list, immu); + + mutex_exit(&(immu->immu_lock)); + + ddi_err(DER_LOG, immu->immu_dip, "IMMU: unit setup"); + + immu_dmar_set_immu(dmar_unit, immu); + + return (dmar_unit); +} + +static void +immu_subsystems_setup(void) +{ + int seg; + void *unit_hdl; + + ddi_err(DER_VERB, NULL, + "Creating state structures for Intel IOMMU units\n"); + + ASSERT(immu_setup == B_FALSE); + ASSERT(immu_running == B_FALSE); + + mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node)); + + mutex_enter(&immu_lock); + + unit_hdl = NULL; + for (seg = 0; seg < IMMU_MAXSEG; seg++) { + while (unit_hdl = immu_state_alloc(seg, unit_hdl)) { + ; + } + } + + immu_regs_setup(&immu_list); /* subsequent code needs this first */ + immu_dvma_setup(&immu_list); + immu_intrmap_setup(&immu_list); + immu_qinv_setup(&immu_list); + + mutex_exit(&immu_lock); +} + +/* + * immu_subsystems_startup() + * startup all units that were setup + */ +static void +immu_subsystems_startup(void) +{ + immu_t *immu; + + mutex_enter(&immu_lock); + + ASSERT(immu_setup == B_TRUE); + ASSERT(immu_running == B_FALSE); + + immu_dmar_startup(); + + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + + mutex_enter(&(immu->immu_lock)); + + immu_intr_register(immu); + immu_dvma_startup(immu); + immu_intrmap_startup(immu); + immu_qinv_startup(immu); + + /* + * Set IOMMU unit's regs to do + * the actual startup. This will + * set immu->immu_running field + * if the unit is successfully + * started + */ + immu_regs_startup(immu); + + mutex_exit(&(immu->immu_lock)); + } + + mutex_exit(&immu_lock); +} + +/* ################## Intel IOMMU internal interfaces ###################### */ + +/* + * Internal interfaces for IOMMU code (i.e. not exported to rootnex + * or rest of system) + */ + +/* + * ddip can be NULL, in which case we walk up until we find the root dip + * NOTE: We never visit the root dip since its not a hardware node + */ +int +immu_walk_ancestor( + dev_info_t *rdip, + dev_info_t *ddip, + int (*func)(dev_info_t *, void *arg), + void *arg, + int *lvlp, + immu_flags_t immu_flags) +{ + dev_info_t *pdip; + int level; + int error = DDI_SUCCESS; + + ASSERT(root_devinfo); + ASSERT(rdip); + ASSERT(rdip != root_devinfo); + ASSERT(func); + + /* ddip and immu can be NULL */ + + /* Hold rdip so that branch is not detached */ + ndi_hold_devi(rdip); + for (pdip = rdip, level = 1; pdip && pdip != root_devinfo; + pdip = ddi_get_parent(pdip), level++) { + + if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) { + error = DDI_FAILURE; + break; + } + if (func(pdip, arg) == DDI_WALK_TERMINATE) { + break; + } + if (immu_flags & IMMU_FLAGS_DONTPASS) { + break; + } + if (pdip == ddip) { + break; + } + } + + ndi_rele_devi(rdip); + + if (lvlp) + *lvlp = level; + + return (error); +} + +/* ######################## Intel IOMMU entry points ####################### */ +/* + * immu_init() + * called from rootnex_attach(). setup but don't startup the Intel IOMMU + * This is the first function called in Intel IOMMU code + */ +void +immu_init(void) +{ + char *phony_reg = "A thing of beauty is a joy forever"; + + /* Set some global shorthands that are needed by all of IOMMU code */ + ASSERT(root_devinfo == NULL); + root_devinfo = ddi_root_node(); + + /* + * Intel IOMMU only supported only if MMU(CPU) page size is == + * IOMMU pages size. + */ + /*LINTED*/ + if (MMU_PAGESIZE != IMMU_PAGESIZE) { + ddi_err(DER_WARN, NULL, + "MMU page size (%d) is not equal to\n" + "IOMMU page size (%d). " + "Disabling Intel IOMMU. ", + MMU_PAGESIZE, IMMU_PAGESIZE); + immu_enable = B_FALSE; + return; + } + + /* + * retrieve the Intel IOMMU boot options. + * Do this before parsing immu ACPI table + * as a boot option could potentially affect + * ACPI parsing. + */ + ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n"); + read_boot_options(); + + /* + * Check the IOMMU enable boot-option first. + * This is so that we can skip parsing the ACPI table + * if necessary because that may cause problems in + * systems with buggy BIOS or ACPI tables + */ + if (immu_enable == B_FALSE) { + return; + } + + /* + * Next, check if the system even has an Intel IOMMU + * We use the presence or absence of the IOMMU ACPI + * table to detect Intel IOMMU. + */ + if (immu_dmar_setup() != DDI_SUCCESS) { + immu_enable = B_FALSE; + return; + } + + /* + * Check blacklists + */ + blacklist_setup(); + + if (blacklisted_smbios() == B_TRUE) { + blacklist_destroy(); + immu_enable = B_FALSE; + return; + } + + if (blacklisted_driver() == B_TRUE) { + blacklist_destroy(); + immu_enable = B_FALSE; + return; + } + + /* + * Read the "raw" DMAR ACPI table to get information + * and convert into a form we can use. + */ + if (immu_dmar_parse() != DDI_SUCCESS) { + blacklist_destroy(); + immu_enable = B_FALSE; + return; + } + + /* + * now that we have processed the ACPI table + * check if we need to blacklist this system + * based on ACPI info + */ + if (blacklisted_acpi() == B_TRUE) { + immu_dmar_destroy(); + blacklist_destroy(); + immu_enable = B_FALSE; + return; + } + + blacklist_destroy(); + + /* + * Check if system has HW quirks. + */ + pre_setup_quirks(); + + /* Now do the rest of the setup */ + immu_subsystems_setup(); + + /* + * Now that the IMMU is setup, create a phony + * reg prop so that suspend/resume works + */ + if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg", + (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) { + ddi_err(DER_PANIC, NULL, "Failed to create reg prop for " + "rootnex node"); + /*NOTREACHED*/ + } + + immu_setup = B_TRUE; +} + +/* + * immu_startup() + * called directly by boot code to startup + * all units of the IOMMU + */ +void +immu_startup(void) +{ + /* + * If IOMMU is disabled, do nothing + */ + if (immu_enable == B_FALSE) { + return; + } + + if (immu_setup == B_FALSE) { + ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, " + "skipping IOMU startup"); + return; + } + + pre_startup_quirks(); + + ddi_err(DER_CONT, NULL, + "?Starting Intel IOMMU (dmar) units...\n"); + + immu_subsystems_startup(); + + immu_running = B_TRUE; +} + +/* + * immu_map_sgl() + * called from rootnex_coredma_bindhdl() when Intel + * IOMMU is enabled to build DVMA cookies and map them. + */ +int +immu_map_sgl(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, + int prealloc_count, dev_info_t *rdip) +{ + if (immu_running == B_FALSE) { + return (DDI_DMA_USE_PHYSICAL); + } + + return (immu_dvma_map(hp, dmareq, NULL, prealloc_count, rdip, + IMMU_FLAGS_DMAHDL)); +} + +/* + * immu_unmap_sgl() + * called from rootnex_coredma_unbindhdl(), to unmap DVMA + * cookies and free them + */ +int +immu_unmap_sgl(ddi_dma_impl_t *hp, dev_info_t *rdip) +{ + if (immu_running == B_FALSE) { + return (DDI_DMA_USE_PHYSICAL); + } + + return (immu_dvma_unmap(hp, rdip)); +} + +/* + * Hook to notify IOMMU code of device tree changes + */ +void +immu_device_tree_changed(void) +{ + if (immu_setup == B_FALSE) { + return; + } + + ddi_err(DER_WARN, NULL, "Intel IOMMU currently " + "does not use device tree updates"); +} + +/* + * Hook to notify IOMMU code of memory changes + */ +void +immu_physmem_update(uint64_t addr, uint64_t size) +{ + if (immu_setup == B_FALSE) { + return; + } + immu_dvma_physmem_update(addr, size); +} + +/* + * immu_quiesce() + * quiesce all units that are running + */ +int +immu_quiesce(void) +{ + immu_t *immu; + int ret = DDI_SUCCESS; + + mutex_enter(&immu_lock); + + if (immu_running == B_FALSE) + return (DDI_SUCCESS); + + ASSERT(immu_setup == B_TRUE); + + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + + /* if immu is not running, we dont quiesce */ + if (immu->immu_regs_running == B_FALSE) + continue; + + /* flush caches */ + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL); + rw_exit(&(immu->immu_ctx_rwlock)); + immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL); + immu_regs_wbf_flush(immu); + + mutex_enter(&(immu->immu_lock)); + + /* + * Set IOMMU unit's regs to do + * the actual shutdown. + */ + immu_regs_shutdown(immu); + immu_regs_suspend(immu); + + /* if immu is still running, we failed */ + if (immu->immu_regs_running == B_TRUE) + ret = DDI_FAILURE; + else + immu->immu_regs_quiesced = B_TRUE; + + mutex_exit(&(immu->immu_lock)); + } + mutex_exit(&immu_lock); + + if (ret == DDI_SUCCESS) { + immu_running = B_FALSE; + immu_quiesced = B_TRUE; + } + + return (ret); +} + +/* + * immu_unquiesce() + * unquiesce all units + */ +int +immu_unquiesce(void) +{ + immu_t *immu; + int ret = DDI_SUCCESS; + + mutex_enter(&immu_lock); + + if (immu_quiesced == B_FALSE) + return (DDI_SUCCESS); + + ASSERT(immu_setup == B_TRUE); + ASSERT(immu_running == B_FALSE); + + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + + mutex_enter(&(immu->immu_lock)); + + /* if immu was not quiesced, i.e was not running before */ + if (immu->immu_regs_quiesced == B_FALSE) + continue; + + if (immu_regs_resume(immu) != DDI_SUCCESS) { + ret = DDI_FAILURE; + continue; + } + + /* flush caches before unquiesce */ + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL); + rw_exit(&(immu->immu_ctx_rwlock)); + immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL); + + /* + * Set IOMMU unit's regs to do + * the actual startup. This will + * set immu->immu_regs_running field + * if the unit is successfully + * started + */ + immu_regs_startup(immu); + + if (immu->immu_regs_running == B_FALSE) { + ret = DDI_FAILURE; + } else { + immu_quiesced = B_TRUE; + immu_running = B_TRUE; + immu->immu_regs_quiesced = B_FALSE; + } + + mutex_exit(&(immu->immu_lock)); + } + + mutex_exit(&immu_lock); + + return (ret); +} + +/* ############## END Intel IOMMU entry points ################## */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu_dmar.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,1289 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + + +#include <sys/debug.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/sunddi.h> +#include <sys/list.h> +#include <sys/pci.h> +#include <sys/pci_cfgspace.h> +#include <sys/pci_impl.h> +#include <sys/sunndi.h> +#include <sys/ksynch.h> +#include <sys/cmn_err.h> +#include <sys/bootconf.h> +#include <sys/int_fmtio.h> +#include <sys/smbios.h> +#include <sys/acpi/acpi.h> +#include <sys/acpica.h> +#include <sys/iommulib.h> +#include <sys/immu.h> + +static void dmar_table_destroy(dmar_table_t *tbl); + +/* + * internal global variables + */ +static char *dmar_raw; /* raw DMAR ACPI table */ +static dmar_table_t *dmar_table; /* converted form of DMAR table */ + +/* + * global variables exported outside this file + */ +boolean_t dmar_print = B_FALSE; +kmutex_t ioapic_drhd_lock; +list_t ioapic_drhd_list; + +/* ######################################################################### */ + +/* + * helper functions to read the "raw" DMAR table + */ + +static uint8_t +get_uint8(char *cp) +{ + uint8_t val = *((uint8_t *)cp); + return (val); +} + +static uint16_t +get_uint16(char *cp) +{ + uint16_t val = *((uint16_t *)cp); + return (val); +} + +static uint32_t +get_uint32(char *cp) +{ + uint32_t val = *((uint32_t *)cp); + return (val); +} + +static uint64_t +get_uint64(char *cp) +{ + uint64_t val = *((uint64_t *)cp); + return (val); +} + +static char * +get_str(char *cp, uint_t len) +{ + char *str = kmem_alloc(len + 1, KM_SLEEP); + + (void) strlcpy(str, cp, len + 1); + + return (str); +} + +static void +scope_list_free(list_t *scope_list) +{ + scope_t *scope; + + if (list_is_empty(scope_list)) { + list_destroy(scope_list); + return; + } + + while ((scope = list_remove_head(scope_list)) != NULL) { + kmem_free(scope, sizeof (scope_t)); + } + + ASSERT(list_is_empty(scope_list)); + list_destroy(scope_list); +} + +static void +drhd_list_destroy(list_t *drhd_list) +{ + drhd_t *drhd; + + ASSERT(drhd_list); + + if (list_is_empty(drhd_list)) { + list_destroy(drhd_list); + return; + } + + while ((drhd = list_remove_head(drhd_list)) != NULL) { + scope_list_free(&(drhd->dr_scope_list)); + kmem_free(drhd, sizeof (drhd_t)); + } + + ASSERT(list_is_empty(drhd_list)); + list_destroy(drhd_list); +} + +static void +rmrr_list_destroy(list_t *rmrr_list) +{ + rmrr_t *rmrr; + + ASSERT(rmrr_list); + + if (list_is_empty(rmrr_list)) { + list_destroy(rmrr_list); + return; + } + + while ((rmrr = list_remove_head(rmrr_list)) != NULL) { + scope_list_free(&(rmrr->rm_scope_list)); + kmem_free(rmrr, sizeof (rmrr_t)); + } + + ASSERT(list_is_empty(rmrr_list)); + list_destroy(rmrr_list); +} + +/* + * parse_scope() + * parse a scope structure in the "raw" table + */ +static scope_t * +parse_scope(char *shead) +{ + scope_t *scope; + char *phead; + int bus, dev, func; + uint8_t startbus; + uint8_t len; + int depth; + + ASSERT(shead); + + scope = kmem_zalloc(sizeof (scope_t), KM_SLEEP); + scope->scp_type = get_uint8(&shead[0]); + scope->scp_enumid = get_uint8(&shead[4]); + + len = get_uint8(&shead[1]); + startbus = get_uint8(&shead[5]); + depth = (len - 6)/2; + ASSERT(depth >= 1); + + phead = &shead[6]; + + bus = startbus; + dev = get_uint8(phead++); + func = get_uint8(phead++); + + for (depth--; depth > 0; depth--) { + bus = pci_getb_func(bus, dev, func, PCI_BCNF_SECBUS); + dev = get_uint8(phead++); + func = get_uint8(phead++); + } + + ASSERT(bus >= 0 && bus < 256); + ASSERT(dev >= 0 && dev < 32); + ASSERT(func >= 0 && func < 8); + + /* ok we got the device BDF */ + scope->scp_bus = bus; + scope->scp_dev = dev; + scope->scp_func = func; + + return (scope); +} + + +/* setup the ioapic_drhd structure */ +static void +ioapic_drhd_setup(void) +{ + mutex_init(&(ioapic_drhd_lock), NULL, MUTEX_DEFAULT, NULL); + + mutex_enter(&(ioapic_drhd_lock)); + list_create(&(ioapic_drhd_list), sizeof (ioapic_drhd_t), + offsetof(ioapic_drhd_t, ioapic_node)); + mutex_exit(&(ioapic_drhd_lock)); +} + +/* get ioapic source id for interrupt remapping */ +static void +ioapic_drhd_insert(scope_t *scope, drhd_t *drhd) +{ + ioapic_drhd_t *idt; + + idt = kmem_zalloc(sizeof (ioapic_drhd_t), KM_SLEEP); + idt->ioapic_ioapicid = scope->scp_enumid; + idt->ioapic_sid = ((scope->scp_bus << 8) | (scope->scp_dev << 3) | + (scope->scp_func)); + idt->ioapic_drhd = drhd; + + mutex_enter(&ioapic_drhd_lock); + list_insert_tail(&ioapic_drhd_list, idt); + mutex_exit(&ioapic_drhd_lock); +} + +static ioapic_drhd_t * +ioapic_drhd_lookup(int ioapicid) +{ + ioapic_drhd_t *idt; + + mutex_enter(&ioapic_drhd_lock); + idt = list_head(&ioapic_drhd_list); + for (; idt; idt = list_next(&ioapic_drhd_list, idt)) { + if (idt->ioapic_ioapicid == ioapicid) { + break; + } + } + mutex_exit(&ioapic_drhd_lock); + + return (idt); +} + +static void +ioapic_drhd_destroy(void) +{ + ioapic_drhd_t *idt; + + mutex_enter(&ioapic_drhd_lock); + while (idt = list_remove_head(&ioapic_drhd_list)) { + kmem_free(idt, sizeof (ioapic_drhd_t)); + } + list_destroy(&ioapic_drhd_list); + mutex_exit(&(ioapic_drhd_lock)); + + mutex_destroy(&(ioapic_drhd_lock)); +} + +/* + * parse_drhd() + * parse the drhd uints in dmar table + */ +static int +parse_drhd(char *uhead, dmar_table_t *tbl) +{ + drhd_t *drhd; + int seg; + int len; + char *shead; + scope_t *scope; + + ASSERT(uhead); + ASSERT(tbl); + ASSERT(get_uint16(&uhead[0]) == DMAR_DRHD); + + seg = get_uint16(&uhead[6]); + if (seg < 0 || seg >= IMMU_MAXSEG) { + ddi_err(DER_WARN, NULL, "invalid segment# <%d>" + "in DRHD unit in ACPI DMAR table", seg); + return (DDI_FAILURE); + } + + drhd = kmem_zalloc(sizeof (drhd_t), KM_SLEEP); + mutex_init(&(drhd->dr_lock), NULL, MUTEX_DEFAULT, NULL); + list_create(&(drhd->dr_scope_list), sizeof (scope_t), + offsetof(scope_t, scp_node)); + + len = get_uint16(&uhead[2]); + drhd->dr_include_all = + (get_uint8(&uhead[4]) & DMAR_INCLUDE_ALL) ? B_TRUE : B_FALSE; + drhd->dr_seg = seg; + drhd->dr_regs = get_uint64(&uhead[8]); + + /* + * parse each scope. + */ + shead = &uhead[16]; + while (shead < &uhead[len - 1]) { + scope = parse_scope(shead); + if (scope == NULL) { + return (DDI_FAILURE); + } + + if (scope->scp_type == DMAR_IOAPIC) { + ioapic_drhd_insert(scope, drhd); + } + + list_insert_tail(&(drhd->dr_scope_list), scope); + shead += get_uint8(&shead[1]); + } + + list_insert_tail(&(tbl->tbl_drhd_list[drhd->dr_seg]), drhd); + + return (DDI_SUCCESS); +} + +/* + * parse_rmrr() + * parse the rmrr units in dmar table + */ +static int +parse_rmrr(char *uhead, dmar_table_t *tbl) +{ + rmrr_t *rmrr; + int seg; + int len; + char *shead; + scope_t *scope; + + ASSERT(uhead); + ASSERT(tbl); + ASSERT(get_uint16(&uhead[0]) == DMAR_RMRR); + + seg = get_uint16(&uhead[6]); + if (seg < 0 || seg >= IMMU_MAXSEG) { + ddi_err(DER_WARN, NULL, "invalid segment# <%d>" + "in RMRR unit in ACPI DMAR table", seg); + return (DDI_FAILURE); + } + + rmrr = kmem_zalloc(sizeof (rmrr_t), KM_SLEEP); + mutex_init(&(rmrr->rm_lock), NULL, MUTEX_DEFAULT, NULL); + list_create(&(rmrr->rm_scope_list), sizeof (scope_t), + offsetof(scope_t, scp_node)); + + /* RMRR region is [base,limit] */ + len = get_uint16(&uhead[2]); + rmrr->rm_seg = get_uint16(&uhead[6]); + rmrr->rm_base = get_uint64(&uhead[8]); + rmrr->rm_limit = get_uint64(&uhead[16]); + + if (rmrr->rm_base > rmrr->rm_limit) { + ddi_err(DER_WARN, NULL, "IMMU: BIOS bug detected: " + "RMRR: base (%lx) > limit (%lx)", + rmrr->rm_base, rmrr->rm_limit); + list_destroy(&(rmrr->rm_scope_list)); + mutex_destroy(&(rmrr->rm_lock)); + kmem_free(rmrr, sizeof (rmrr_t)); + return (DDI_SUCCESS); + } + + /* + * parse each scope in RMRR + */ + shead = &uhead[24]; + while (shead < &uhead[len - 1]) { + scope = parse_scope(shead); + if (scope == NULL) { + return (DDI_FAILURE); + } + list_insert_tail(&(rmrr->rm_scope_list), scope); + shead += get_uint8(&shead[1]); + } + + list_insert_tail(&(tbl->tbl_rmrr_list[rmrr->rm_seg]), rmrr); + + return (DDI_SUCCESS); +} + +#define TBL_OEM_ID_SZ (6) +#define TBL_OEM_TBLID_SZ (8) + +/* + * parse the "raw" DMAR table and convert it + * into a useful form. + */ +static int +dmar_parse(dmar_table_t **tblpp, char *raw) +{ + char *uhead; + dmar_table_t *tbl; + int i; + char *unmstr; + + ASSERT(raw); + ASSERT(tblpp); + + *tblpp = NULL; + + /* + * do a sanity check. make sure the raw table + * has the right signature + */ + if (raw[0] != 'D' || raw[1] != 'M' || + raw[2] != 'A' || raw[3] != 'R') { + ddi_err(DER_WARN, NULL, "IOMMU ACPI " + "signature != \"DMAR\""); + return (DDI_FAILURE); + } + + /* + * the platform has intel iommu, create processed ACPI struct + */ + tbl = kmem_zalloc(sizeof (dmar_table_t), KM_SLEEP); + mutex_init(&(tbl->tbl_lock), NULL, MUTEX_DEFAULT, NULL); + + tbl->tbl_raw = raw; + + /* + * Note we explicitly show offsets for clarity + */ + tbl->tbl_rawlen = get_uint32(&raw[4]); + + /* XXX TO DO verify checksum of table */ + tbl->tbl_oem_id = get_str(&raw[10], TBL_OEM_ID_SZ); + tbl->tbl_oem_tblid = get_str(&raw[16], TBL_OEM_TBLID_SZ); + tbl->tbl_oem_rev = get_uint32(&raw[24]); + tbl->tbl_haw = get_uint8(&raw[36]) + 1; + tbl->tbl_intrmap = (get_uint8(&raw[37]) & DMAR_INTRMAP_SUPPORT) + ? B_TRUE : B_FALSE; + + /* create lists for DRHD and RMRR */ + for (i = 0; i < IMMU_MAXSEG; i++) { + list_create(&(tbl->tbl_drhd_list[i]), sizeof (drhd_t), + offsetof(drhd_t, dr_node)); + list_create(&(tbl->tbl_rmrr_list[i]), sizeof (rmrr_t), + offsetof(rmrr_t, rm_node)); + } + + ioapic_drhd_setup(); + + /* + * parse each unit. Currently only DRHD and RMRR types + * are parsed. We ignore all other types of units. + */ + uhead = &raw[48]; + while (uhead < &raw[tbl->tbl_rawlen - 1]) { + unmstr = NULL; + switch (get_uint16(uhead)) { + case DMAR_DRHD: + if (parse_drhd(uhead, tbl) != DDI_SUCCESS) { + goto failed; + } + break; + case DMAR_RMRR: + if (parse_rmrr(uhead, tbl) != DDI_SUCCESS) { + goto failed; + } + break; + case DMAR_ATSR: + unmstr = "ATSR"; + break; + case DMAR_RHSA: + unmstr = "RHSA"; + break; + default: + unmstr = "unknown unity type"; + break; + } + if (unmstr) { + ddi_err(DER_NOTE, NULL, "DMAR ACPI table: " + "skipping unsupported unit type %s", unmstr); + } + uhead += get_uint16(&uhead[2]); + } + + *tblpp = tbl; + return (DDI_SUCCESS); + +failed: + dmar_table_destroy(tbl); + return (DDI_FAILURE); +} + +static char * +scope_type(int devtype) +{ + char *typestr; + + switch (devtype) { + case DMAR_ENDPOINT: + typestr = "endpoint-device"; + break; + case DMAR_SUBTREE: + typestr = "subtree-device"; + break; + case DMAR_IOAPIC: + typestr = "IOAPIC"; + break; + case DMAR_HPET: + typestr = "HPET"; + break; + default: + typestr = "Unknown device"; + break; + } + + return (typestr); +} + +static void +print_scope_list(list_t *scope_list) +{ + scope_t *scope; + + if (list_is_empty(scope_list)) + return; + + ddi_err(DER_CONT, NULL, "\tdevice list:\n"); + + for (scope = list_head(scope_list); scope; + scope = list_next(scope_list, scope)) { + ddi_err(DER_CONT, NULL, "\t\ttype = %s\n", + scope_type(scope->scp_type)); + ddi_err(DER_CONT, NULL, "\n\t\tbus = %d\n", + scope->scp_bus); + ddi_err(DER_CONT, NULL, "\t\tdev = %d\n", + scope->scp_dev); + ddi_err(DER_CONT, NULL, "\t\tfunc = %d\n", + scope->scp_func); + } +} + +static void +print_drhd_list(list_t *drhd_list) +{ + drhd_t *drhd; + + if (list_is_empty(drhd_list)) + return; + + ddi_err(DER_CONT, NULL, "\ndrhd list:\n"); + + for (drhd = list_head(drhd_list); drhd; + drhd = list_next(drhd_list, drhd)) { + + ddi_err(DER_CONT, NULL, "\n\tsegment = %d\n", + drhd->dr_seg); + ddi_err(DER_CONT, NULL, "\treg_base = 0x%" PRIx64 "\n", + drhd->dr_regs); + ddi_err(DER_CONT, NULL, "\tinclude_all = %s\n", + drhd->dr_include_all == B_TRUE ? "TRUE" : "FALSE"); + ddi_err(DER_CONT, NULL, "\tdip = 0x%p\n", + (void *)drhd->dr_dip); + + print_scope_list(&(drhd->dr_scope_list)); + } +} + + +static void +print_rmrr_list(list_t *rmrr_list) +{ + rmrr_t *rmrr; + + if (list_is_empty(rmrr_list)) + return; + + ddi_err(DER_CONT, NULL, "\nrmrr list:\n"); + + for (rmrr = list_head(rmrr_list); rmrr; + rmrr = list_next(rmrr_list, rmrr)) { + + ddi_err(DER_CONT, NULL, "\n\tsegment = %d\n", + rmrr->rm_seg); + ddi_err(DER_CONT, NULL, "\tbase = 0x%lx\n", + rmrr->rm_base); + ddi_err(DER_CONT, NULL, "\tlimit = 0x%lx\n", + rmrr->rm_limit); + + print_scope_list(&(rmrr->rm_scope_list)); + } +} + +/* + * print DMAR table + */ +static void +dmar_table_print(dmar_table_t *tbl) +{ + int i; + + if (dmar_print == B_FALSE) { + return; + } + + /* print the title */ + ddi_err(DER_CONT, NULL, "#### Start of dmar_table ####\n"); + ddi_err(DER_CONT, NULL, "\thaw = %d\n", tbl->tbl_haw); + ddi_err(DER_CONT, NULL, "\tintr_remap = %s\n", + tbl->tbl_intrmap == B_TRUE ? "<true>" : "<false>"); + + /* print drhd list */ + for (i = 0; i < IMMU_MAXSEG; i++) { + print_drhd_list(&(tbl->tbl_drhd_list[i])); + } + + + /* print rmrr list */ + for (i = 0; i < IMMU_MAXSEG; i++) { + print_rmrr_list(&(tbl->tbl_rmrr_list[i])); + } + + ddi_err(DER_CONT, NULL, "#### END of dmar_table ####\n"); +} + +static void +drhd_devi_create(drhd_t *drhd, char *name) +{ + struct ddi_parent_private_data *pdptr; + struct regspec reg; + dev_info_t *dip; + + ndi_devi_alloc_sleep(root_devinfo, name, + DEVI_SID_NODEID, &dip); + + drhd->dr_dip = dip; + + reg.regspec_bustype = 0; + reg.regspec_addr = drhd->dr_regs; + reg.regspec_size = IMMU_REGSZ; + + /* + * update the reg properties + * + * reg property will be used for register + * set access + * + * refer to the bus_map of root nexus driver + * I/O or memory mapping: + * + * <bustype=0, addr=x, len=x>: memory + * <bustype=1, addr=x, len=x>: i/o + * <bustype>1, addr=0, len=x>: x86-compatibility i/o + */ + (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, + dip, "reg", (int *)®, + sizeof (struct regspec) / sizeof (int)); + + + pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data) + + sizeof (struct regspec), KM_SLEEP); + pdptr->par_nreg = 1; + pdptr->par_reg = (struct regspec *)(pdptr + 1); + pdptr->par_reg->regspec_bustype = 0; + pdptr->par_reg->regspec_addr = drhd->dr_regs; + pdptr->par_reg->regspec_size = IMMU_REGSZ; + ddi_set_parent_data(dip, pdptr); +} + +/* + * dmar_devinfos_create() + * + * create the dev_info node in the device tree, + * the info node is a nuxus child of the root + * nexus + */ +static void +dmar_devinfos_create(dmar_table_t *tbl) +{ + list_t *drhd_list; + drhd_t *drhd; + char name[IMMU_MAXNAMELEN]; + int i, unit; + + for (i = 0; i < IMMU_MAXSEG; i++) { + + drhd_list = &(tbl->tbl_drhd_list[i]); + + if (list_is_empty(drhd_list)) + continue; + + drhd = list_head(drhd_list); + for (unit = 0; drhd; + drhd = list_next(drhd_list, drhd), unit++) { + (void) snprintf(name, sizeof (name), + "drhd%d,%d", i, unit); + drhd_devi_create(drhd, name); + } + } +} + +static void +drhd_devi_destroy(drhd_t *drhd) +{ + dev_info_t *dip; + int count; + + dip = drhd->dr_dip; + ASSERT(dip); + + ndi_devi_enter(root_devinfo, &count); + if (ndi_devi_offline(dip, NDI_DEVI_REMOVE) != DDI_SUCCESS) { + ddi_err(DER_WARN, dip, "Failed to destroy"); + } + ndi_devi_exit(root_devinfo, count); + drhd->dr_dip = NULL; +} + +/* + * dmar_devi_destroy() + * + * destroy dev_info nodes for all drhd units + */ +static void +dmar_devi_destroy(dmar_table_t *tbl) +{ + drhd_t *drhd; + list_t *drhd_list; + int i; + + for (i = 0; i < IMMU_MAXSEG; i++) { + drhd_list = &(tbl->tbl_drhd_list[i]); + if (list_is_empty(drhd_list)) + continue; + + drhd = list_head(drhd_list); + for (; drhd; drhd = list_next(drhd_list, drhd)) { + drhd_devi_destroy(drhd); + } + } +} + +static int +match_bdf(dev_info_t *ddip, void *arg) +{ + immu_arg_t *imarg = (immu_arg_t *)arg; + immu_devi_t *immu_devi; + + ASSERT(ddip); + ASSERT(imarg); + ASSERT(imarg->ima_seg == 0); + ASSERT(imarg->ima_bus >= 0); + ASSERT(imarg->ima_devfunc >= 0); + ASSERT(imarg->ima_ddip == NULL); + + /* rdip can be NULL */ + + mutex_enter(&(DEVI(ddip)->devi_lock)); + + immu_devi = IMMU_DEVI(ddip); + ASSERT(immu_devi); + + if (immu_devi->imd_seg == imarg->ima_seg && + immu_devi->imd_bus == imarg->ima_bus && + immu_devi->imd_devfunc == imarg->ima_devfunc) { + imarg->ima_ddip = ddip; + } + + mutex_exit(&(DEVI(ddip)->devi_lock)); + + return (imarg->ima_ddip ? DDI_WALK_TERMINATE : DDI_WALK_CONTINUE); +} +static void +dmar_table_destroy(dmar_table_t *tbl) +{ + int i; + + ASSERT(tbl); + + /* destroy lists for DRHD and RMRR */ + for (i = 0; i < IMMU_MAXSEG; i++) { + rmrr_list_destroy(&(tbl->tbl_rmrr_list[i])); + drhd_list_destroy(&(tbl->tbl_drhd_list[i])); + } + + /* free strings */ + kmem_free(tbl->tbl_oem_tblid, TBL_OEM_ID_SZ + 1); + kmem_free(tbl->tbl_oem_id, TBL_OEM_TBLID_SZ + 1); + tbl->tbl_raw = NULL; /* raw ACPI table doesn't have to be freed */ + mutex_destroy(&(tbl->tbl_lock)); + kmem_free(tbl, sizeof (dmar_table_t)); +} + +/* + * ######################################################################### + * Functions exported by dmar.c + * This file deals with reading and processing the DMAR ACPI table + * ######################################################################### + */ + +/* + * immu_dmar_setup() + * Check if the system has a DMAR ACPI table. If yes, the system + * has Intel IOMMU hardware + */ +int +immu_dmar_setup(void) +{ + if (AcpiGetTable("DMAR", 1, (ACPI_TABLE_HEADER **)&dmar_raw) != AE_OK) { + ddi_err(DER_LOG, NULL, + "No DMAR ACPI table. No Intel IOMMU present\n"); + dmar_raw = NULL; + return (DDI_FAILURE); + } + ASSERT(dmar_raw); + return (DDI_SUCCESS); +} + +/* + * immu_dmar_parse() + * Called by immu.c to parse and convert "raw" ACPI DMAR table + */ +int +immu_dmar_parse(void) +{ + dmar_table_t *tbl = NULL; + + /* we should already have found the "raw" table */ + ASSERT(dmar_raw); + + ddi_err(DER_CONT, NULL, "?Processing DMAR ACPI table\n"); + + dmar_table = NULL; + + /* + * parse DMAR ACPI table + */ + if (dmar_parse(&tbl, dmar_raw) != DDI_SUCCESS) { + ASSERT(tbl == NULL); + return (DDI_FAILURE); + } + + ASSERT(tbl); + + /* + * create one devinfo for every drhd unit + * in the DMAR table + */ + dmar_devinfos_create(tbl); + + /* + * print the dmar table if the debug option is set + */ + dmar_table_print(tbl); + + dmar_table = tbl; + + return (DDI_SUCCESS); +} + +void +immu_dmar_startup(void) +{ + /* nothing to do */ +} + +void +immu_dmar_shutdown(void) +{ + /* nothing to do */ +} + +void +immu_dmar_destroy(void) +{ + dmar_devi_destroy(dmar_table); + dmar_table_destroy(dmar_table); + ioapic_drhd_destroy(); + dmar_table = NULL; + dmar_raw = NULL; +} + +boolean_t +immu_dmar_blacklisted(char **strptr, uint_t nstrs) +{ + dmar_table_t *tbl = dmar_table; + int i; + char oem_rev[IMMU_MAXNAMELEN]; + + ASSERT(tbl); + + ASSERT((strptr == NULL) ^ (nstrs != 0)); + + /* + * Must be a minimum of 4 + */ + if (nstrs < 4) { + return (B_FALSE); + } + + ddi_err(DER_CONT, NULL, "?System DMAR ACPI table information:\n"); + ddi_err(DER_CONT, NULL, "?OEM-ID = <%s>\n", tbl->tbl_oem_id); + ddi_err(DER_CONT, NULL, "?Table-ID = <%s>\n", tbl->tbl_oem_tblid); + (void) snprintf(oem_rev, sizeof (oem_rev), "%d", tbl->tbl_oem_rev); + ddi_err(DER_CONT, NULL, "?Revision = <%s>\n", oem_rev); + + for (i = 0; nstrs - i >= 4; i++) { + if (strcmp(*strptr++, "DMAR") == 0) { + if (strcmp(*strptr++, tbl->tbl_oem_id) == 0 && + ((char *)strptr == '\0' || + strcmp(*strptr++, tbl->tbl_oem_tblid) == 0) && + ((char *)strptr == '\0' || + strcmp(*strptr++, oem_rev) == 0)) { + return (B_TRUE); + } + i += 3; /* for loops adds 1 as well, so only 3 here */ + } + } + return (B_FALSE); +} + +void +immu_dmar_rmrr_map(void) +{ + int seg; + int e; + int count; + dev_info_t *rdip; + scope_t *scope; + rmrr_t *rmrr; + dmar_table_t *tbl; + + ASSERT(dmar_table); + + tbl = dmar_table; + + /* called during boot, when kernel is single threaded. No lock */ + + /* + * for each segment, walk the rmrr list looking for an exact match + */ + for (seg = 0; seg < IMMU_MAXSEG; seg++) { + rmrr = list_head(&(tbl->tbl_rmrr_list)[seg]); + for (; rmrr; rmrr = list_next(&(tbl->tbl_rmrr_list)[seg], + rmrr)) { + + /* + * try to match BDF *exactly* to a device scope. + */ + scope = list_head(&(rmrr->rm_scope_list)); + for (; scope; + scope = list_next(&(rmrr->rm_scope_list), scope)) { + immu_arg_t imarg = {0}; + memrng_t mrng = {0}; + + /* PCI endpoint devices only */ + if (scope->scp_type != DMAR_ENDPOINT) + continue; + + imarg.ima_seg = seg; + imarg.ima_bus = scope->scp_bus; + imarg.ima_devfunc = + IMMU_PCI_DEVFUNC(scope->scp_dev, + scope->scp_func); + imarg.ima_ddip = NULL; + imarg.ima_rdip = NULL; + + ASSERT(root_devinfo); + /* XXX should be optimized */ + ndi_devi_enter(root_devinfo, &count); + ddi_walk_devs(ddi_get_child(root_devinfo), + match_bdf, &imarg); + ndi_devi_exit(root_devinfo, count); + + if (imarg.ima_ddip == NULL) { + ddi_err(DER_WARN, NULL, + "No dip found for " + "bus=0x%x, dev=0x%x, func= 0x%x", + scope->scp_bus, scope->scp_dev, + scope->scp_func); + continue; + } + + rdip = imarg.ima_ddip; + /* + * This address must be in the BIOS reserved + * map + */ + if (!address_in_memlist(bios_rsvd, + (uint64_t)rmrr->rm_base, rmrr->rm_limit - + rmrr->rm_base + 1)) { + ddi_err(DER_WARN, rdip, "RMRR range " + " [0x%" PRIx64 " - 0x%" PRIx64 "]" + " is not in BIOS reserved map", + rmrr->rm_base, rmrr->rm_limit); + } + + /* XXX could be more efficient */ + memlist_read_lock(); + if (address_in_memlist(phys_install, + (uint64_t)rmrr->rm_base, rmrr->rm_limit - + rmrr->rm_base + 1)) { + ddi_err(DER_WARN, rdip, "RMRR range " + " [0x%" PRIx64 " - 0x%" PRIx64 "]" + " is in physinstall map", + rmrr->rm_base, rmrr->rm_limit); + } + memlist_read_unlock(); + + + ddi_err(DER_LOG, rdip, + "IMMU: Mapping RMRR range " + "[0x%" PRIx64 " - 0x%"PRIx64 "]", + rmrr->rm_base, rmrr->rm_limit); + + mrng.mrng_start = + IMMU_ROUNDOWN((uintptr_t)rmrr->rm_base); + mrng.mrng_npages = + IMMU_ROUNDUP((uintptr_t)rmrr->rm_limit - + (uintptr_t)rmrr->rm_base + 1) / + IMMU_PAGESIZE; + e = immu_dvma_map(NULL, NULL, &mrng, 0, rdip, + IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | + IMMU_FLAGS_MEMRNG); + /* + * dip may have unity domain or xlate domain + * If the former, PHYSICAL is returned else + * MAPPED is returned. + */ + ASSERT(e == DDI_DMA_MAPPED || + e == DDI_DMA_USE_PHYSICAL); + } + } + } + +} + +immu_t * +immu_dmar_get_immu(dev_info_t *rdip) +{ + int seg; + int tlevel; + int level; + drhd_t *drhd; + drhd_t *tdrhd; + scope_t *scope; + dmar_table_t *tbl; + + ASSERT(dmar_table); + + tbl = dmar_table; + + mutex_enter(&(tbl->tbl_lock)); + + /* + * for each segment, walk the drhd list looking for an exact match + */ + for (seg = 0; seg < IMMU_MAXSEG; seg++) { + drhd = list_head(&(tbl->tbl_drhd_list)[seg]); + for (; drhd; drhd = list_next(&(tbl->tbl_drhd_list)[seg], + drhd)) { + + /* + * we are currently searching for exact matches so + * skip "include all" (catchall) and subtree matches + */ + if (drhd->dr_include_all == B_TRUE) + continue; + + /* + * try to match BDF *exactly* to a device scope. + */ + scope = list_head(&(drhd->dr_scope_list)); + for (; scope; + scope = list_next(&(drhd->dr_scope_list), scope)) { + immu_arg_t imarg = {0}; + + /* PCI endpoint devices only */ + if (scope->scp_type != DMAR_ENDPOINT) + continue; + + imarg.ima_seg = seg; + imarg.ima_bus = scope->scp_bus; + imarg.ima_devfunc = + IMMU_PCI_DEVFUNC(scope->scp_dev, + scope->scp_func); + imarg.ima_ddip = NULL; + imarg.ima_rdip = rdip; + level = 0; + if (immu_walk_ancestor(rdip, NULL, match_bdf, + &imarg, &level, IMMU_FLAGS_DONTPASS) + != DDI_SUCCESS) { + /* skip - nothing else we can do */ + continue; + } + + /* Should have walked only 1 level i.e. rdip */ + ASSERT(level == 1); + + if (imarg.ima_ddip) { + ASSERT(imarg.ima_ddip == rdip); + goto found; + } + } + } + } + + /* + * walk the drhd list looking for subtree match + * i.e. is the device a descendant of a devscope BDF. + * We want the lowest subtree. + */ + tdrhd = NULL; + tlevel = 0; + for (seg = 0; seg < IMMU_MAXSEG; seg++) { + drhd = list_head(&(tbl->tbl_drhd_list)[seg]); + for (; drhd; drhd = list_next(&(tbl->tbl_drhd_list)[seg], + drhd)) { + + /* looking for subtree match */ + if (drhd->dr_include_all == B_TRUE) + continue; + + /* + * try to match the device scope + */ + scope = list_head(&(drhd->dr_scope_list)); + for (; scope; + scope = list_next(&(drhd->dr_scope_list), scope)) { + immu_arg_t imarg = {0}; + + /* PCI subtree only */ + if (scope->scp_type != DMAR_SUBTREE) + continue; + + imarg.ima_seg = seg; + imarg.ima_bus = scope->scp_bus; + imarg.ima_devfunc = + IMMU_PCI_DEVFUNC(scope->scp_dev, + scope->scp_func); + + imarg.ima_ddip = NULL; + imarg.ima_rdip = rdip; + level = 0; + if (immu_walk_ancestor(rdip, NULL, match_bdf, + &imarg, &level, 0) != DDI_SUCCESS) { + /* skip - nothing else we can do */ + continue; + } + + /* should have walked 1 level i.e. rdip */ + ASSERT(level > 0); + + /* look for lowest ancestor matching drhd */ + if (imarg.ima_ddip && (tdrhd == NULL || + level < tlevel)) { + tdrhd = drhd; + tlevel = level; + } + } + } + } + + if ((drhd = tdrhd) != NULL) { + goto found; + } + + for (seg = 0; seg < IMMU_MAXSEG; seg++) { + drhd = list_head(&(tbl->tbl_drhd_list[seg])); + for (; drhd; drhd = list_next(&(tbl->tbl_drhd_list)[seg], + drhd)) { + /* Look for include all */ + if (drhd->dr_include_all == B_TRUE) { + break; + } + } + } + + /*FALLTHRU*/ + +found: + mutex_exit(&(tbl->tbl_lock)); + + /* + * No drhd (dmar unit) found for this device in the ACPI DMAR tables. + * This may happen with buggy versions of BIOSes. Just warn instead + * of panic as we don't want whole system to go down because of one + * device. + */ + if (drhd == NULL) { + ddi_err(DER_WARN, rdip, "can't find Intel IOMMU unit for " + "device in ACPI DMAR table."); + return (NULL); + } + + return (drhd->dr_immu); +} + +char * +immu_dmar_unit_name(void *dmar_unit) +{ + drhd_t *drhd = (drhd_t *)dmar_unit; + + ASSERT(drhd->dr_dip); + return (ddi_node_name(drhd->dr_dip)); +} + +dev_info_t * +immu_dmar_unit_dip(void *dmar_unit) +{ + drhd_t *drhd = (drhd_t *)dmar_unit; + return (drhd->dr_dip); +} + +void * +immu_dmar_walk_units(int seg, void *dmar_unit) +{ + list_t *drhd_list; + drhd_t *drhd = (drhd_t *)dmar_unit; + + drhd_list = &(dmar_table->tbl_drhd_list[seg]); + + if (drhd == NULL) { + return ((void *)list_head(drhd_list)); + } else { + return ((void *)list_next(drhd_list, drhd)); + } +} + +void +immu_dmar_set_immu(void *dmar_unit, immu_t *immu) +{ + drhd_t *drhd = (drhd_t *)dmar_unit; + + ASSERT(drhd); + ASSERT(immu); + + drhd->dr_immu = immu; +} + +boolean_t +immu_dmar_intrmap_supported(void) +{ + ASSERT(dmar_table); + return (dmar_table->tbl_intrmap); +} + +/* for a given ioapicid, find the source id and immu */ +uint16_t +immu_dmar_ioapic_sid(int ioapicid) +{ + ioapic_drhd_t *idt; + + idt = ioapic_drhd_lookup(ioapicid); + if (idt == NULL) { + ddi_err(DER_PANIC, NULL, "cannot determine source-id for " + "IOAPIC (id = %d)", ioapicid); + /*NOTREACHED*/ + } + + return (idt->ioapic_sid); +} + +/* for a given ioapicid, find the source id and immu */ +immu_t * +immu_dmar_ioapic_immu(int ioapicid) +{ + ioapic_drhd_t *idt; + + idt = ioapic_drhd_lookup(ioapicid); + if (idt) { + return (idt->ioapic_drhd ? idt->ioapic_drhd->dr_immu : NULL); + } + return (NULL); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu_dvma.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,3190 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* + * DVMA code + * This file contains Intel IOMMU code that deals with DVMA + * i.e. DMA remapping. + */ + +#include <sys/sysmacros.h> +#include <sys/pcie.h> +#include <sys/pci_cfgspace.h> +#include <vm/hat_i86.h> +#include <sys/memlist.h> +#include <sys/acpi/acpi.h> +#include <sys/acpica.h> +#include <sys/modhash.h> +#include <sys/immu.h> + +#undef TEST + +/* + * Macros based on PCI spec + */ +#define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */ +#define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */ +#define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */ + +#define IMMU_CONTIG_PADDR(d, p) \ + ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p)) + +typedef struct dvma_arg { + immu_t *dva_immu; + dev_info_t *dva_rdip; + dev_info_t *dva_ddip; + domain_t *dva_domain; + int dva_level; + immu_flags_t dva_flags; + list_t *dva_list; + int dva_error; +} dvma_arg_t; + +static domain_t *domain_create(immu_t *immu, dev_info_t *ddip, + dev_info_t *rdip, immu_flags_t immu_flags); +static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus, + int dev, int func, immu_flags_t immu_flags); +static void destroy_immu_devi(immu_devi_t *immu_devi); +static void dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, + uint64_t spaddr, uint64_t npages, dev_info_t *rdip, + immu_flags_t immu_flags); +extern struct memlist *phys_install; + + + +/* static Globals */ + +/* + * Used to setup DMA objects (memory regions) + * for DMA reads by IOMMU units + */ +static ddi_dma_attr_t immu_dma_attr = { + DMA_ATTR_V0, + 0U, + 0xffffffffU, + 0xffffffffU, + MMU_PAGESIZE, /* MMU page aligned */ + 0x1, + 0x1, + 0xffffffffU, + 0xffffffffU, + 1, + 4, + 0 +}; + +static ddi_device_acc_attr_t immu_acc_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC +}; + + +/* globals private to this file */ +static kmutex_t immu_domain_lock; +static list_t immu_unity_domain_list; +static list_t immu_xlate_domain_list; + +/* structure used to store idx into each level of the page tables */ +typedef struct xlate { + int xlt_level; + uint_t xlt_idx; + pgtable_t *xlt_pgtable; +} xlate_t; + +/* 0 is reserved by Vt-d spec. Solaris reserves 1 */ +#define IMMU_UNITY_DID 1 + +static mod_hash_t *bdf_domain_hash; + +static domain_t * +bdf_domain_lookup(immu_devi_t *immu_devi) +{ + domain_t *domain; + int16_t seg = immu_devi->imd_seg; + int16_t bus = immu_devi->imd_bus; + int16_t devfunc = immu_devi->imd_devfunc; + uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); + + if (seg < 0 || bus < 0 || devfunc < 0) { + return (NULL); + } + + domain = NULL; + if (mod_hash_find(bdf_domain_hash, + (void *)bdf, (void *)&domain) == 0) { + ASSERT(domain); + ASSERT(domain->dom_did > 0); + return (domain); + } else { + return (NULL); + } +} + +static void +bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain) +{ + int16_t seg = immu_devi->imd_seg; + int16_t bus = immu_devi->imd_bus; + int16_t devfunc = immu_devi->imd_devfunc; + uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); + int r; + + if (seg < 0 || bus < 0 || devfunc < 0) { + return; + } + + r = mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain); + ASSERT(r != MH_ERR_DUPLICATE); + ASSERT(r == 0); +} + +static int +match_lpc(dev_info_t *pdip, void *arg) +{ + immu_devi_t *immu_devi; + dvma_arg_t *dvap = (dvma_arg_t *)arg; + + ASSERT(dvap->dva_error == DDI_FAILURE); + ASSERT(dvap->dva_ddip == NULL); + ASSERT(dvap->dva_list); + + if (list_is_empty(dvap->dva_list)) { + return (DDI_WALK_TERMINATE); + } + + immu_devi = list_head(dvap->dva_list); + for (; immu_devi; immu_devi = list_next(dvap->dva_list, + immu_devi)) { + ASSERT(immu_devi->imd_dip); + if (immu_devi->imd_dip == pdip) { + dvap->dva_ddip = pdip; + dvap->dva_error = DDI_SUCCESS; + return (DDI_WALK_TERMINATE); + } + } + + return (DDI_WALK_CONTINUE); +} + +static void +immu_devi_set_spclist(dev_info_t *dip, immu_t *immu) +{ + list_t *spclist = NULL; + immu_devi_t *immu_devi; + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_lock))); + + immu_devi = IMMU_DEVI(dip); + if (immu_devi->imd_display == B_TRUE) { + spclist = &(immu->immu_dvma_gfx_list); + } else if (immu_devi->imd_lpc == B_TRUE) { + spclist = &(immu->immu_dvma_lpc_list); + } + + if (spclist) { + mutex_enter(&(immu->immu_lock)); + list_insert_head(spclist, immu_devi); + mutex_exit(&(immu->immu_lock)); + } +} + +/* + * Set the immu_devi struct in the immu_devi field of a devinfo node + */ +int +immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags) +{ + int bus, dev, func; + immu_devi_t *new_imd; + immu_devi_t *immu_devi; + + ASSERT(root_devinfo); + ASSERT(dip); + ASSERT(dip != root_devinfo); + + immu_devi = immu_devi_get(dip); + if (immu_devi != NULL) { + return (DDI_SUCCESS); + } + + bus = dev = func = -1; + + /* + * Assume a new immu_devi struct is needed + */ + if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) { + /* + * No BDF. Set bus = -1 to indicate this. + * We still need to create a immu_devi struct + * though + */ + bus = -1; + dev = 0; + func = 0; + } + + new_imd = create_immu_devi(dip, bus, dev, func, immu_flags); + if (new_imd == NULL) { + ddi_err(DER_WARN, dip, "Failed to create immu_devi " + "structure"); + return (DDI_FAILURE); + } + + /* + * Check if some other thread allocated a immu_devi while we + * didn't own the lock. + */ + mutex_enter(&(DEVI(dip)->devi_lock)); + if (IMMU_DEVI(dip) == NULL) { + IMMU_DEVI_SET(dip, new_imd); + } else { + destroy_immu_devi(new_imd); + } + mutex_exit(&(DEVI(dip)->devi_lock)); + + return (DDI_SUCCESS); +} + +static dev_info_t * +get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags) +{ + dvma_arg_t dvarg = {0}; + dvarg.dva_list = &(immu->immu_dvma_lpc_list); + dvarg.dva_rdip = rdip; + dvarg.dva_error = DDI_FAILURE; + + if (immu_walk_ancestor(rdip, NULL, match_lpc, + &dvarg, NULL, immu_flags) != DDI_SUCCESS) { + ddi_err(DER_MODE, rdip, "Could not walk ancestors to " + "find lpc_devinfo for ISA device"); + return (NULL); + } + + if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) { + ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for " + "ISA device"); + return (NULL); + } + + return (dvarg.dva_ddip); +} + +static dev_info_t * +get_gfx_devinfo(dev_info_t *rdip) +{ + immu_t *immu; + immu_devi_t *immu_devi; + list_t *list_gfx; + + /* + * The GFX device may not be on the same IMMU unit as "agpgart" + * so search globally + */ + immu_devi = NULL; + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + list_gfx = &(immu->immu_dvma_gfx_list); + if (!list_is_empty(list_gfx)) { + immu_devi = list_head(list_gfx); + break; + } + } + + if (immu_devi == NULL) { + ddi_err(DER_WARN, rdip, "IMMU: No GFX device. " + "Cannot redirect agpgart", + ddi_node_name(immu_devi->imd_dip)); + return (NULL); + } + + /* list is not empty we checked above */ + ASSERT(immu_devi); + ASSERT(immu_devi->imd_dip); + + ddi_err(DER_LOG, rdip, "IMMU: GFX redirect to %s", + ddi_node_name(immu_devi->imd_dip)); + + return (immu_devi->imd_dip); +} + +static immu_flags_t +dma_to_immu_flags(struct ddi_dma_req *dmareq) +{ + immu_flags_t flags = 0; + + if (dmareq->dmar_fp == DDI_DMA_SLEEP) { + flags |= IMMU_FLAGS_SLEEP; + } else { + flags |= IMMU_FLAGS_NOSLEEP; + } + + /* + * Read and write flags need to be reversed. + * DMA_READ means read from device and write + * to memory. So DMA read means DVMA write. + */ + if (dmareq->dmar_flags & DDI_DMA_READ) + flags |= IMMU_FLAGS_WRITE; + + if (dmareq->dmar_flags & DDI_DMA_WRITE) + flags |= IMMU_FLAGS_READ; + +#ifdef BUGGY_DRIVERS + /* + * Some buggy drivers specify neither READ or WRITE + * For such drivers set both read and write permissions + */ + if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) { + flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); + } +#endif + + return (flags); +} + +/* + * pgtable_alloc() + * alloc a IOMMU pgtable structure. + * This same struct is used for root and context tables as well. + * This routine allocs the f/ollowing: + * - a pgtable_t struct + * - a HW page which holds PTEs/entries which is accesssed by HW + * so we set up DMA for this page + * - a SW page which is only for our bookeeping + * (for example to hold pointers to the next level pgtable). + * So a simple kmem_alloc suffices + */ +static pgtable_t * +pgtable_alloc(immu_t *immu, domain_t *domain, immu_flags_t immu_flags) +{ + size_t actual_size = 0; + pgtable_t *pgtable; + int (*dmafp)(caddr_t); + caddr_t vaddr; + int kmflags; + + /* TO DO cache freed pgtables as it is expensive to create em */ + ASSERT(immu); + + kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? + KM_NOSLEEP : KM_SLEEP; + + dmafp = (immu_flags & IMMU_FLAGS_NOSLEEP) ? + DDI_DMA_DONTWAIT : DDI_DMA_SLEEP; + + pgtable = kmem_zalloc(sizeof (pgtable_t), kmflags); + if (pgtable == NULL) { + return (NULL); + } + + pgtable->swpg_next_array = kmem_zalloc(IMMU_PAGESIZE, kmflags); + if (pgtable->swpg_next_array == NULL) { + kmem_free(pgtable, sizeof (pgtable_t)); + return (NULL); + } + + ASSERT(root_devinfo); + if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr, + dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) { + kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE); + kmem_free(pgtable, sizeof (pgtable_t)); + return (NULL); + } + + if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE, + &immu_acc_attr, DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, + dmafp, NULL, &vaddr, &actual_size, + &pgtable->hwpg_memhdl) != DDI_SUCCESS) { + ddi_dma_free_handle(&pgtable->hwpg_dmahdl); + kmem_free((void *)(pgtable->swpg_next_array), + IMMU_PAGESIZE); + kmem_free(pgtable, sizeof (pgtable_t)); + return (NULL); + } + + /* + * Memory allocation failure. Maybe a temporary condition + * so return error rather than panic, so we can try again + */ + if (actual_size < IMMU_PAGESIZE) { + ddi_dma_mem_free(&pgtable->hwpg_memhdl); + ddi_dma_free_handle(&pgtable->hwpg_dmahdl); + kmem_free((void *)(pgtable->swpg_next_array), + IMMU_PAGESIZE); + kmem_free(pgtable, sizeof (pgtable_t)); + return (NULL); + } + + pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr)); + pgtable->hwpg_vaddr = vaddr; + + bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE); + + /* Use immu directly as domain may be NULL, cant use dom_immu field */ + immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE); + + rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL); + + if (domain) { + rw_enter(&(domain->dom_pgtable_rwlock), RW_WRITER); + list_insert_head(&(domain->dom_pglist), pgtable); + rw_exit(&(domain->dom_pgtable_rwlock)); + } + + return (pgtable); +} + +static void +pgtable_free(immu_t *immu, pgtable_t *pgtable, domain_t *domain) +{ + ASSERT(immu); + ASSERT(pgtable); + + if (domain) { + rw_enter(&(domain->dom_pgtable_rwlock), RW_WRITER); + list_remove(&(domain->dom_pglist), pgtable); + rw_exit(&(domain->dom_pgtable_rwlock)); + } + + /* destroy will panic if lock is held. */ + rw_destroy(&(pgtable->swpg_rwlock)); + + /* Zero out the HW page being freed to catch errors */ + bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE); + immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE); + ddi_dma_mem_free(&pgtable->hwpg_memhdl); + ddi_dma_free_handle(&pgtable->hwpg_dmahdl); + /* don't zero out the soft pages for debugging */ + if (pgtable->swpg_next_array) + kmem_free((void *)(pgtable->swpg_next_array), IMMU_PAGESIZE); + kmem_free(pgtable, sizeof (pgtable_t)); +} + +/* + * Function to identify a display device from the PCI class code + */ +static boolean_t +device_is_display(uint_t classcode) +{ + static uint_t disp_classes[] = { + 0x000100, + 0x030000, + 0x030001 + }; + int i, nclasses = sizeof (disp_classes) / sizeof (uint_t); + + for (i = 0; i < nclasses; i++) { + if (classcode == disp_classes[i]) + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Function that determines if device is PCIEX and/or PCIEX bridge + */ +static boolean_t +device_is_pciex( + uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib) +{ + ushort_t cap; + ushort_t capsp; + ushort_t cap_count = PCI_CAP_MAX_PTR; + ushort_t status; + boolean_t is_pciex = B_FALSE; + + *is_pcib = B_FALSE; + + status = pci_getw_func(bus, dev, func, PCI_CONF_STAT); + if (!(status & PCI_STAT_CAP)) + return (B_FALSE); + + capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR); + while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) { + capsp &= PCI_CAP_PTR_MASK; + cap = pci_getb_func(bus, dev, func, capsp); + + if (cap == PCI_CAP_ID_PCI_E) { + status = pci_getw_func(bus, dev, func, capsp + 2); + /* + * See section 7.8.2 of PCI-Express Base Spec v1.0a + * for Device/Port Type. + * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the + * device is a PCIE2PCI bridge + */ + *is_pcib = + ((status & PCIE_PCIECAP_DEV_TYPE_MASK) == + PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE; + is_pciex = B_TRUE; + } + + capsp = (*pci_getb_func)(bus, dev, func, + capsp + PCI_CAP_NEXT_PTR); + } + + return (is_pciex); +} + + +/* + * immu_dvma_get_immu() + * get the immu unit structure for a dev_info node + */ +immu_t * +immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags) +{ + immu_devi_t *immu_devi; + immu_t *immu; + + /* + * check if immu unit was already found earlier. + * If yes, then it will be stashed in immu_devi struct. + */ + immu_devi = immu_devi_get(dip); + if (immu_devi == NULL) { + if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) { + /* + * May fail because of low memory. Return error rather + * than panic as we want driver to rey again later + */ + ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " + "No immu_devi structure"); + /*NOTREACHED*/ + } + immu_devi = immu_devi_get(dip); + ASSERT(immu_devi); + } + + mutex_enter(&(DEVI(dip)->devi_lock)); + if (immu_devi->imd_immu) { + immu = immu_devi->imd_immu; + mutex_exit(&(DEVI(dip)->devi_lock)); + return (immu); + } + mutex_exit(&(DEVI(dip)->devi_lock)); + + immu = immu_dmar_get_immu(dip); + if (immu == NULL) { + ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " + "Cannot find immu_t for device"); + /*NOTREACHED*/ + } + + /* + * Check if some other thread found immu + * while lock was not held + */ + immu_devi = immu_devi_get(dip); + /* immu_devi should be present as we found it earlier */ + if (immu_devi == NULL) { + ddi_err(DER_PANIC, dip, + "immu_dvma_get_immu: No immu_devi structure"); + /*NOTREACHED*/ + } + + mutex_enter(&(DEVI(dip)->devi_lock)); + if (immu_devi->imd_immu == NULL) { + /* nobody else set it, so we should do it */ + immu_devi->imd_immu = immu; + immu_devi_set_spclist(dip, immu); + } else { + /* + * if some other thread got immu before + * us, it should get the same results + */ + if (immu_devi->imd_immu != immu) { + ddi_err(DER_PANIC, dip, "Multiple " + "immu units found for device. Expected (%p), " + "actual (%p)", (void *)immu, + (void *)immu_devi->imd_immu); + mutex_exit(&(DEVI(dip)->devi_lock)); + /*NOTREACHED*/ + } + } + mutex_exit(&(DEVI(dip)->devi_lock)); + + return (immu); +} + + +/* ############################# IMMU_DEVI code ############################ */ + +/* + * Allocate a immu_devi structure and initialize it + */ +static immu_devi_t * +create_immu_devi(dev_info_t *rdip, int bus, int dev, int func, + immu_flags_t immu_flags) +{ + uchar_t baseclass, subclass; + uint_t classcode, revclass; + immu_devi_t *immu_devi; + boolean_t pciex = B_FALSE; + int kmflags; + boolean_t is_pcib = B_FALSE; + + /* bus == -1 indicate non-PCI device (no BDF) */ + ASSERT(bus == -1 || bus >= 0); + ASSERT(dev >= 0); + ASSERT(func >= 0); + + kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; + immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags); + if (immu_devi == NULL) { + ddi_err(DER_WARN, rdip, "Failed to allocate memory for " + "Intel IOMMU immu_devi structure"); + return (NULL); + } + immu_devi->imd_dip = rdip; + immu_devi->imd_seg = 0; /* Currently seg can only be 0 */ + immu_devi->imd_bus = bus; + immu_devi->imd_pcib_type = IMMU_PCIB_BAD; + + if (bus == -1) { + immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF; + return (immu_devi); + } + + immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func); + immu_devi->imd_sec = 0; + immu_devi->imd_sub = 0; + + revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID); + + classcode = IMMU_PCI_REV2CLASS(revclass); + baseclass = IMMU_PCI_CLASS2BASE(classcode); + subclass = IMMU_PCI_CLASS2SUB(classcode); + + if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) { + + immu_devi->imd_sec = pci_getb_func(bus, dev, func, + PCI_BCNF_SECBUS); + immu_devi->imd_sub = pci_getb_func(bus, dev, func, + PCI_BCNF_SUBBUS); + + pciex = device_is_pciex(bus, dev, func, &is_pcib); + if (pciex == B_TRUE && is_pcib == B_TRUE) { + immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI; + } else if (pciex == B_TRUE) { + immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE; + } else { + immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI; + } + } else { + immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT; + } + + /* check for certain special devices */ + immu_devi->imd_display = device_is_display(classcode); + + immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) && + (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE; + + immu_devi->imd_domain = NULL; + + return (immu_devi); +} + +static void +destroy_immu_devi(immu_devi_t *immu_devi) +{ + kmem_free(immu_devi, sizeof (immu_devi_t)); +} + +static domain_t * +immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp) +{ + immu_devi_t *immu_devi; + domain_t *domain; + dev_info_t *ddip; + + ASSERT(rdip); + ASSERT(ddipp); + + *ddipp = NULL; + + immu_devi = immu_devi_get(rdip); + if (immu_devi == NULL) { + return (NULL); + } + + mutex_enter(&(DEVI(rdip)->devi_lock)); + domain = immu_devi->imd_domain; + ddip = immu_devi->imd_ddip; + mutex_exit(&(DEVI(rdip)->devi_lock)); + + if (domain) { + ASSERT(domain->dom_did > 0); + ASSERT(ddip); + *ddipp = ddip; + } + + return (domain); + +} + +/* ############################# END IMMU_DEVI code ######################## */ +/* ############################# DOMAIN code ############################### */ + +/* + * This routine always succeeds + */ +static int +did_alloc(immu_t *immu, dev_info_t *rdip, + dev_info_t *ddip, immu_flags_t immu_flags) +{ + int did; + + ASSERT(immu); + ASSERT(rdip); + ASSERT(rdip != root_devinfo); + + did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1, + (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP); + + if (did == 0) { + ASSERT(immu->immu_unity_domain); + ASSERT(immu->immu_unity_domain->dom_did > 0); + ddi_err(DER_WARN, rdip, "device domain-id alloc error" + " domain-device: %s%d. immu unit is %s. Using " + "unity domain with domain-id (%d)", + ddi_driver_name(ddip), ddi_get_instance(ddip), + immu->immu_name, immu->immu_unity_domain->dom_did); + did = immu->immu_unity_domain->dom_did; + } + + return (did); +} + +static int +get_branch_domain(dev_info_t *pdip, void *arg) +{ + immu_devi_t *immu_devi; + domain_t *domain; + dev_info_t *ddip; + immu_t *immu; + dvma_arg_t *dvp = (dvma_arg_t *)arg; + + ASSERT(pdip); + ASSERT(dvp); + ASSERT(dvp->dva_rdip); + + /* + * The field dvp->dva_rdip is a work-in-progress + * and gets updated as we walk up the ancestor + * tree. The final ddip is set only when we reach + * the top of the tree. So the dvp->dva_ddip field cannot + * be relied on until we reach the top of the field. + */ + + /* immu_devi may not be set. */ + immu_devi = immu_devi_get(pdip); + if (immu_devi == NULL) { + if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) { + dvp->dva_error = DDI_FAILURE; + return (DDI_WALK_TERMINATE); + } + } + + immu_devi = immu_devi_get(pdip); + ASSERT(immu_devi); + immu = immu_devi->imd_immu; + if (immu == NULL) { + immu = immu_dvma_get_immu(pdip, dvp->dva_flags); + ASSERT(immu); + } + + /* + * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to + * terminate the walk (since the device under the PCIE bridge + * is a PCIE device and has an independent entry in the + * root/context table) + */ + if (dvp->dva_rdip != pdip && + immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) { + return (DDI_WALK_TERMINATE); + } + + /* + * In order to be a domain-dim, it must be a PCI device i.e. + * must have valid BDF. This also eliminates the root complex. + */ + if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD && + immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) { + ASSERT(immu_devi->imd_bus >= 0); + ASSERT(immu_devi->imd_devfunc >= 0); + dvp->dva_ddip = pdip; + } + + if (immu_devi->imd_display == B_TRUE || + (dvp->dva_flags & IMMU_FLAGS_UNITY)) { + dvp->dva_domain = immu->immu_unity_domain; + /* continue walking to find ddip */ + return (DDI_WALK_CONTINUE); + } + + mutex_enter(&(DEVI(pdip)->devi_lock)); + domain = immu_devi->imd_domain; + ddip = immu_devi->imd_ddip; + mutex_exit(&(DEVI(pdip)->devi_lock)); + + if (domain && ddip) { + /* if domain is set, it must be the same */ + if (dvp->dva_domain) { + ASSERT(domain == dvp->dva_domain); + } + dvp->dva_domain = domain; + dvp->dva_ddip = ddip; + return (DDI_WALK_TERMINATE); + } + + /* immu_devi either has both set or both clear */ + ASSERT(domain == NULL); + ASSERT(ddip == NULL); + + /* Domain may already be set, continue walking so that ddip gets set */ + if (dvp->dva_domain) { + return (DDI_WALK_CONTINUE); + } + + /* domain is not set in either immu_devi or dvp */ + domain = bdf_domain_lookup(immu_devi); + if (domain == NULL) { + return (DDI_WALK_CONTINUE); + } + + /* ok, the BDF hash had a domain for this BDF. */ + + /* Grab lock again to check if something else set immu_devi fields */ + mutex_enter(&(DEVI(pdip)->devi_lock)); + if (immu_devi->imd_domain != NULL) { + ASSERT(immu_devi->imd_domain == domain); + dvp->dva_domain = domain; + } else { + dvp->dva_domain = domain; + } + mutex_exit(&(DEVI(pdip)->devi_lock)); + + /* + * walk upwards until the topmost PCI bridge is found + */ + return (DDI_WALK_CONTINUE); +} + +static void +map_unity_domain(domain_t *domain) +{ + struct memlist *mp; + uint64_t start; + uint64_t npages; + + ASSERT(domain); + ASSERT(domain->dom_did == IMMU_UNITY_DID); + + /* + * We call into routines that grab the lock so we should + * not be called with the lock held. This does not matter + * much since, no else has a reference to this domain + */ + ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); + + /* + * UNITY arenas are a mirror of the physical memory + * installed on the system. + */ + +#ifdef BUGGY_DRIVERS + /* + * Dont skip page0. Some broken HW/FW access it. + */ + dvma_map(domain->dom_immu, domain, 0, 0, 1, NULL, + IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); +#endif + + memlist_read_lock(); + + mp = phys_install; + + if (mp->ml_address == 0) { + /* since we already mapped page1 above */ + start = IMMU_PAGESIZE; + } else { + start = mp->ml_address; + } + npages = mp->ml_size/IMMU_PAGESIZE + 1; + + dvma_map(domain->dom_immu, domain, start, start, npages, NULL, + IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); + + ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 + " - 0x%" PRIx64 "]", start, start + mp->ml_size); + + mp = mp->ml_next; + while (mp) { + ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 + " - 0x%" PRIx64 "]", mp->ml_address, + mp->ml_address + mp->ml_size); + + start = mp->ml_address; + npages = mp->ml_size/IMMU_PAGESIZE + 1; + + dvma_map(domain->dom_immu, domain, start, start, + npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); + + mp = mp->ml_next; + } + + mp = bios_rsvd; + while (mp) { + ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 + " - 0x%" PRIx64 "]", mp->ml_address, + mp->ml_address + mp->ml_size); + + start = mp->ml_address; + npages = mp->ml_size/IMMU_PAGESIZE + 1; + + dvma_map(domain->dom_immu, domain, start, start, + npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); + + mp = mp->ml_next; + } + + memlist_read_unlock(); +} + +/* + * create_xlate_arena() + * Create the dvma arena for a domain with translation + * mapping + */ +static void +create_xlate_arena(immu_t *immu, domain_t *domain, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + char *arena_name; + struct memlist *mp; + int vmem_flags; + uint64_t start; + uint_t mgaw; + uint64_t size; + uint64_t maxaddr; + void *vmem_ret; + + arena_name = domain->dom_dvma_arena_name; + + /* Note, don't do sizeof (arena_name) - it is just a pointer */ + (void) snprintf(arena_name, + sizeof (domain->dom_dvma_arena_name), + "%s-domain-%d-xlate-DVMA-arena", immu->immu_name, + domain->dom_did); + + vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP; + + /* + * No one else has access to this domain. + * So no domain locks needed + */ + ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); + + /* Restrict mgaddr (max guest addr) to MGAW */ + mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap); + + /* + * To ensure we avoid ioapic and PCI MMIO ranges we just + * use the physical memory address range of the system as the + * range + * Implementing above causes graphics device to barf on + * Lenovo X301 hence the toggle switch immu_mmio_safe. + */ + maxaddr = ((uint64_t)1 << mgaw); + + if (immu_mmio_safe == B_FALSE) { + + start = MMU_PAGESIZE; + size = maxaddr - start; + + ddi_err(DER_VERB, rdip, + "%s: Creating dvma vmem arena [0x%" PRIx64 + " - 0x%" PRIx64 "]", arena_name, start, start + size); + + ASSERT(domain->dom_dvma_arena == NULL); + + /* + * We always allocate in quanta of IMMU_PAGESIZE + */ + domain->dom_dvma_arena = vmem_create(arena_name, + (void *)(uintptr_t)start, /* start addr */ + size, /* size */ + IMMU_PAGESIZE, /* quantum */ + NULL, /* afunc */ + NULL, /* ffunc */ + NULL, /* source */ + 0, /* qcache_max */ + vmem_flags); + + if (domain->dom_dvma_arena == NULL) { + ddi_err(DER_PANIC, rdip, + "Failed to allocate DVMA arena(%s) " + "for domain ID (%d)", arena_name, domain->dom_did); + /*NOTREACHED*/ + } + + } else { + + memlist_read_lock(); + + mp = phys_install; + + if (mp->ml_address == 0) + start = MMU_PAGESIZE; + else + start = mp->ml_address; + + if (start + mp->ml_size > maxaddr) + size = maxaddr - start; + else + size = mp->ml_size; + + ddi_err(DER_VERB, rdip, + "%s: Creating dvma vmem arena [0x%" PRIx64 + " - 0x%" PRIx64 "]", arena_name, start, start + size); + + ASSERT(domain->dom_dvma_arena == NULL); + + /* + * We always allocate in quanta of IMMU_PAGESIZE + */ + domain->dom_dvma_arena = vmem_create(arena_name, + (void *)(uintptr_t)start, /* start addr */ + size, /* size */ + IMMU_PAGESIZE, /* quantum */ + NULL, /* afunc */ + NULL, /* ffunc */ + NULL, /* source */ + 0, /* qcache_max */ + vmem_flags); + + if (domain->dom_dvma_arena == NULL) { + ddi_err(DER_PANIC, rdip, + "Failed to allocate DVMA arena(%s) " + "for domain ID (%d)", arena_name, domain->dom_did); + /*NOTREACHED*/ + } + + mp = mp->ml_next; + while (mp) { + + if (mp->ml_address == 0) + start = MMU_PAGESIZE; + else + start = mp->ml_address; + + if (start + mp->ml_size > maxaddr) + size = maxaddr - start; + else + size = mp->ml_size; + + ddi_err(DER_VERB, rdip, + "%s: Adding dvma vmem span [0x%" PRIx64 + " - 0x%" PRIx64 "]", arena_name, start, + start + size); + + vmem_ret = vmem_add(domain->dom_dvma_arena, + (void *)(uintptr_t)start, size, vmem_flags); + + if (vmem_ret == NULL) { + ddi_err(DER_PANIC, rdip, + "Failed to allocate DVMA arena(%s) " + "for domain ID (%d)", + arena_name, domain->dom_did); + /*NOTREACHED*/ + } + + mp = mp->ml_next; + } + memlist_read_unlock(); + } +} + +/* ################################### DOMAIN CODE ######################### */ + +/* + * Set the domain and domain-dip for a dip + */ +static void +set_domain( + dev_info_t *dip, + dev_info_t *ddip, + domain_t *domain) +{ + immu_devi_t *immu_devi; + domain_t *fdomain; + dev_info_t *fddip; + + ASSERT(dip); + ASSERT(ddip); + ASSERT(domain); + ASSERT(domain->dom_did > 0); /* must be an initialized domain */ + + immu_devi = immu_devi_get(dip); + ASSERT(immu_devi); + + mutex_enter(&(DEVI(dip)->devi_lock)); + fddip = immu_devi->imd_ddip; + fdomain = immu_devi->imd_domain; + + if (fddip) { + ASSERT(fddip == ddip); + } else { + immu_devi->imd_ddip = ddip; + } + + if (fdomain) { + ASSERT(fdomain == domain); + } else { + immu_devi->imd_domain = domain; + } + mutex_exit(&(DEVI(dip)->devi_lock)); +} + +/* + * device_domain() + * Get domain for a device. The domain may be global in which case it + * is shared between all IOMMU units. Due to potential AGAW differences + * between IOMMU units, such global domains *have to be* UNITY mapping + * domains. Alternatively, the domain may be local to a IOMMU unit. + * Local domains may be shared or immu_devi, although the + * scope of sharing + * is restricted to devices controlled by the IOMMU unit to + * which the domain + * belongs. If shared, they (currently) have to be UNITY domains. If + * immu_devi a domain may be either UNITY or translation (XLATE) domain. + */ +static domain_t * +device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags) +{ + dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */ + dev_info_t *edip; /* effective dip used for finding domain */ + immu_t *immu; + domain_t *domain; + dvma_arg_t dvarg = {0}; + int level; + + ASSERT(rdip); + + *ddipp = NULL; + + /* + * Check if the domain is already set. This is usually true + * if this is not the first DVMA transaction. + */ + ddip = NULL; + domain = immu_devi_domain(rdip, &ddip); + if (domain) { + ASSERT(domain->dom_did > 0); + ASSERT(ddip); + *ddipp = ddip; + return (domain); + } + + immu = immu_dvma_get_immu(rdip, immu_flags); + if (immu == NULL) { + /* + * possible that there is no IOMMU unit for this device + * - BIOS bugs are one example. + */ + return (NULL); + } + + /* + * Some devices need to be redirected + */ + edip = rdip; + + /* + * for isa devices attached under lpc + */ + if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) { + edip = get_lpc_devinfo(immu, rdip, immu_flags); + } + + /* + * for gart, use the real graphic devinfo + */ + if (strcmp(ddi_node_name(rdip), "agpgart") == 0) { + edip = get_gfx_devinfo(rdip); + } + + if (edip == NULL) { + ddi_err(DER_MODE, rdip, "IMMU redirect failed"); + return (NULL); + } + + dvarg.dva_rdip = edip; + dvarg.dva_ddip = NULL; + dvarg.dva_domain = NULL; + dvarg.dva_flags = immu_flags; + level = 0; + if (immu_walk_ancestor(edip, NULL, get_branch_domain, + &dvarg, &level, immu_flags) != DDI_SUCCESS) { + /* + * maybe low memory. return error, + * so driver tries again later + */ + return (NULL); + } + + /* should have walked at least 1 dip (i.e. edip) */ + ASSERT(level > 0); + + ddip = dvarg.dva_ddip; /* must be present */ + domain = dvarg.dva_domain; /* may be NULL */ + + /* + * We may find the domain during our ancestor walk on any one of our + * ancestor dips, If the domain is found then the domain-dip + * (i.e. ddip) will also be found in the same immu_devi struct. + * The domain-dip is the highest ancestor dip which shares the + * same domain with edip. + * The domain may or may not be found, but the domain dip must + * be found. + */ + if (ddip == NULL) { + ddi_err(DER_MODE, rdip, "Cannot find domain dip for device. " + "Effective dip (%s%d)", ddi_driver_name(edip), + ddi_get_instance(edip)); + return (NULL); + } + + /* + * Did we find a domain ? + */ + if (domain) { + goto found; + } + + /* nope, so allocate */ + domain = domain_create(immu, ddip, rdip, immu_flags); + if (domain == NULL) { + return (NULL); + } + ASSERT(domain->dom_did > 0); + + /*FALLTHROUGH*/ +found: + /* + * We know *domain *is* the right domain, so panic if + * another domain is set for either the request-dip or + * effective dip. + */ + set_domain(ddip, ddip, domain); + set_domain(edip, ddip, domain); + set_domain(rdip, ddip, domain); + + *ddipp = ddip; + return (domain); +} + +static void +create_unity_domain(immu_t *immu) +{ + domain_t *domain; + + /* 0 is reserved by Vt-d */ + /*LINTED*/ + ASSERT(IMMU_UNITY_DID > 0); + + /* domain created during boot and always use sleep flag */ + domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP); + + rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); + list_create(&(domain->dom_pglist), sizeof (pgtable_t), + offsetof(pgtable_t, swpg_domain_node)); + + domain->dom_did = IMMU_UNITY_DID; + domain->dom_maptype = IMMU_MAPTYPE_UNITY; + + domain->dom_immu = immu; + immu->immu_unity_domain = domain; + + /* + * Setup the domain's initial page table + * should never fail. + */ + domain->dom_pgtable_root = pgtable_alloc(immu, domain, + IMMU_FLAGS_SLEEP); + + ASSERT(domain->dom_pgtable_root); + + map_unity_domain(domain); + + /* + * put it on the system-wide UNITY domain list + */ + mutex_enter(&(immu_domain_lock)); + list_insert_tail(&immu_unity_domain_list, domain); + mutex_exit(&(immu_domain_lock)); +} + +/* + * ddip is the domain-dip - the topmost dip in a domain + * rdip is the requesting-dip - the device which is + * requesting DVMA setup + * if domain is a non-shared domain rdip == ddip + */ +static domain_t * +domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip, + immu_flags_t immu_flags) +{ + int kmflags; + domain_t *domain; + char mod_hash_name[128]; + immu_devi_t *immu_devi; + int did; + + ASSERT(immu); + ASSERT(ddip); + + immu_devi = immu_devi_get(rdip); + + ASSERT(immu_devi); + + /* + * First allocate a domainid. + * This routine will never fail, since if we run out + * of domains the unity domain will be allocated. + */ + did = did_alloc(immu, rdip, ddip, immu_flags); + ASSERT(did > 0); + if (did == IMMU_UNITY_DID) { + /* domain overflow */ + ASSERT(immu->immu_unity_domain); + return (immu->immu_unity_domain); + } + + kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; + domain = kmem_zalloc(sizeof (domain_t), kmflags); + if (domain == NULL) { + ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain " + "structure for device. IOMMU unit: %s", immu->immu_name); + /*NOTREACHED*/ + } + + rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); + list_create(&(domain->dom_pglist), sizeof (pgtable_t), + offsetof(pgtable_t, swpg_domain_node)); + + (void) snprintf(mod_hash_name, sizeof (mod_hash_name), + "immu%s-domain%d-pava-hash", immu->immu_name, did); + + domain->dom_did = did; + domain->dom_immu = immu; + domain->dom_maptype = IMMU_MAPTYPE_XLATE; + + /* + * Create xlate DVMA arena for this domain. + */ + create_xlate_arena(immu, domain, rdip, immu_flags); + + /* + * Setup the domain's initial page table + */ + domain->dom_pgtable_root = pgtable_alloc(immu, domain, immu_flags); + if (domain->dom_pgtable_root == NULL) { + ddi_err(DER_PANIC, rdip, "Failed to alloc root " + "pgtable for domain (%d). IOMMU unit: %s", + domain->dom_did, immu->immu_name); + /*NOTREACHED*/ + } + + /* + * Since this is a immu unit-specific domain, put it on + * the per-immu domain list. + */ + mutex_enter(&(immu->immu_lock)); + list_insert_head(&immu->immu_domain_list, domain); + mutex_exit(&(immu->immu_lock)); + + /* + * Also put it on the system-wide xlate domain list + */ + mutex_enter(&(immu_domain_lock)); + list_insert_head(&immu_xlate_domain_list, domain); + mutex_exit(&(immu_domain_lock)); + + bdf_domain_insert(immu_devi, domain); + +#ifdef BUGGY_DRIVERS + /* + * Map page0. Some broken HW/FW access it. + */ + dvma_map(domain->dom_immu, domain, 0, 0, 1, NULL, + IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); +#endif + + return (domain); +} + +/* + * Create domainid arena. + * Domainid 0 is reserved by Vt-d spec and cannot be used by + * system software. + * Domainid 1 is reserved by solaris and used for *all* of the following: + * as the "uninitialized" domain - For devices not yet controlled + * by Solaris + * as the "unity" domain - For devices that will always belong + * to the unity domain + * as the "overflow" domain - Used for any new device after we + * run out of domains + * All of the above domains map into a single domain with + * domainid 1 and UNITY DVMA mapping + * Each IMMU unity has its own unity/uninit/overflow domain + */ +static void +did_init(immu_t *immu) +{ + (void) snprintf(immu->immu_did_arena_name, + sizeof (immu->immu_did_arena_name), + "%s_domainid_arena", immu->immu_name); + + ddi_err(DER_VERB, NULL, "%s: Creating domainid arena %s", + immu->immu_name, immu->immu_did_arena_name); + + immu->immu_did_arena = vmem_create( + immu->immu_did_arena_name, + (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */ + immu->immu_max_domains - IMMU_UNITY_DID, + 1, /* quantum */ + NULL, /* afunc */ + NULL, /* ffunc */ + NULL, /* source */ + 0, /* qcache_max */ + VM_SLEEP); + + /* Even with SLEEP flag, vmem_create() can fail */ + if (immu->immu_did_arena == NULL) { + ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel " + "IOMMU domainid allocator: %s", immu->immu_name, + immu->immu_did_arena_name); + } +} + +/* ######################### CONTEXT CODE ################################# */ + +static void +context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table, + int bus, int devfunc) +{ + pgtable_t *context; + pgtable_t *pgtable_root; + pgtable_t *unity_pgtable_root; + hw_rce_t *hw_rent; + hw_rce_t *hw_cent; + hw_rce_t *ctxp; + + ASSERT(rw_write_held(&(immu->immu_ctx_rwlock))); + + ASSERT(immu); + ASSERT(domain); + ASSERT(root_table); + ASSERT(bus >= 0); + ASSERT(devfunc >= 0); + ASSERT(domain->dom_pgtable_root); + + ctxp = (hw_rce_t *)(root_table->swpg_next_array); + context = *(pgtable_t **)(ctxp + bus); + hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus; + if (ROOT_GET_P(hw_rent)) { + ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr); + } else { + ROOT_SET_CONT(hw_rent, context->hwpg_paddr); + ROOT_SET_P(hw_rent); + immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t)); + } + hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc; + + pgtable_root = domain->dom_pgtable_root; + unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root; + if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED) { + ASSERT(CONT_GET_P(hw_cent)); + ASSERT(CONT_GET_DID(hw_cent) == + immu->immu_unity_domain->dom_did); + ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); + ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY); + ASSERT(CONT_GET_ASR(hw_cent) == + unity_pgtable_root->hwpg_paddr); + + /* need to disable context entry before reprogramming it */ + bzero(hw_cent, sizeof (hw_rce_t)); + + /* flush caches */ + immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); + ASSERT(rw_write_held(&(immu->immu_ctx_rwlock))); + immu_regs_context_flush(immu, 0, 0, + immu->immu_unity_domain->dom_did, CONTEXT_DSI); + immu_regs_context_flush(immu, 0, 0, domain->dom_did, + CONTEXT_DSI); + immu_regs_iotlb_flush(immu, immu->immu_unity_domain->dom_did, + 0, 0, TLB_IVA_WHOLE, IOTLB_DSI); + immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, + TLB_IVA_WHOLE, IOTLB_DSI); + immu_regs_wbf_flush(immu); + + CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED); + CONT_SET_DID(hw_cent, domain->dom_did); + CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); + CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); + /*LINTED*/ + CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); + CONT_SET_P(hw_cent); + immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); + } else { + ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED); + ASSERT(CONT_GET_P(hw_cent)); + ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did); + ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); + ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY); + ASSERT(CONT_GET_ASR(hw_cent) == pgtable_root->hwpg_paddr); + } +} + +static pgtable_t * +context_create(immu_t *immu) +{ + int bus; + int devfunc; + pgtable_t *root_table; + pgtable_t *context; + pgtable_t *pgtable_root; + hw_rce_t *ctxp; + hw_rce_t *hw_rent; + hw_rce_t *hw_cent; + + /* Allocate a zeroed root table (4K 256b entries) */ + root_table = pgtable_alloc(immu, NULL, IMMU_FLAGS_SLEEP); + + /* + * Setup context tables for all possible root table entries. + * Start out with unity domains for all entries. + */ + ctxp = (hw_rce_t *)(root_table->swpg_next_array); + hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr); + for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) { + context = pgtable_alloc(immu, NULL, IMMU_FLAGS_SLEEP); + ASSERT(ROOT_GET_P(hw_rent) == 0); + ROOT_SET_P(hw_rent); + ROOT_SET_CONT(hw_rent, context->hwpg_paddr); + hw_cent = (hw_rce_t *)(context->hwpg_vaddr); + for (devfunc = 0; devfunc < IMMU_CONT_NUM; + devfunc++, hw_cent++) { + ASSERT(CONT_GET_P(hw_cent) == 0); + pgtable_root = + immu->immu_unity_domain->dom_pgtable_root; + CONT_SET_DID(hw_cent, + immu->immu_unity_domain->dom_did); + CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); + CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); + /*LINTED*/ + CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); + CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED); + CONT_SET_P(hw_cent); + } + immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE); + *((pgtable_t **)ctxp) = context; + } + immu_regs_cpu_flush(immu, root_table->hwpg_vaddr, IMMU_PAGESIZE); + + return (root_table); +} + +/* + * Called during rootnex attach, so no locks needed + */ +static void +context_init(immu_t *immu) +{ + ASSERT(immu); + ASSERT(immu->immu_ctx_root == NULL); + + rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL); + + immu_regs_wbf_flush(immu); + + immu->immu_ctx_root = context_create(immu); + + immu_regs_set_root_table(immu); + + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL); + rw_exit(&(immu->immu_ctx_rwlock)); + immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL); + immu_regs_wbf_flush(immu); +} + + +/* + * Find top pcib + */ +static int +find_top_pcib(dev_info_t *dip, void *arg) +{ + immu_devi_t *immu_devi; + dev_info_t **pcibdipp = (dev_info_t **)arg; + + ASSERT(dip); + + immu_devi = immu_devi_get(dip); + ASSERT(immu_devi); + + if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) { + *pcibdipp = dip; + } + + return (DDI_WALK_CONTINUE); +} + +static int +immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + immu_devi_t *r_immu_devi; + immu_devi_t *d_immu_devi; + int r_bus; + int d_bus; + int r_devfunc; + int d_devfunc; + immu_pcib_t d_pcib_type; + immu_pcib_t r_pcib_type; + dev_info_t *pcibdip; + + if (ddip == NULL || rdip == NULL || + ddip == root_devinfo || rdip == root_devinfo) { + ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or " + "request-dip are NULL or are root devinfo"); + return (DDI_FAILURE); + } + + /* + * We need to set the context fields + * based on what type of device rdip and ddip are. + * To do that we need the immu_devi field. + * Set the immu_devi field (if not already set) + */ + if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) { + ddi_err(DER_MODE, rdip, + "immu_context_update: failed to set immu_devi for ddip"); + return (DDI_FAILURE); + } + + if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) { + ddi_err(DER_MODE, rdip, + "immu_context_update: failed to set immu_devi for rdip"); + return (DDI_FAILURE); + } + + d_immu_devi = immu_devi_get(ddip); + r_immu_devi = immu_devi_get(rdip); + ASSERT(r_immu_devi); + ASSERT(d_immu_devi); + + d_bus = d_immu_devi->imd_bus; + d_devfunc = d_immu_devi->imd_devfunc; + d_pcib_type = d_immu_devi->imd_pcib_type; + r_bus = r_immu_devi->imd_bus; + r_devfunc = r_immu_devi->imd_devfunc; + r_pcib_type = r_immu_devi->imd_pcib_type; + + ASSERT(d_bus >= 0); + + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + if (rdip == ddip) { + ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT || + d_pcib_type == IMMU_PCIB_PCIE_PCIE); + ASSERT(r_bus >= 0); + ASSERT(r_devfunc >= 0); + /* rdip is a PCIE device. set context for it only */ + context_set(immu, domain, immu->immu_ctx_root, r_bus, + r_devfunc); +#ifdef BUGGY_DRIVERS + } else if (r_immu_devi == d_immu_devi) { +#ifdef TEST + ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and " + "0x%lx are identical", rdip, ddip); +#endif + ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT); + ASSERT(r_bus >= 0); + ASSERT(r_devfunc >= 0); + /* rdip is a PCIE device. set context for it only */ + context_set(immu, domain, immu->immu_ctx_root, r_bus, + r_devfunc); +#endif + } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) { + /* + * ddip is a PCIE_PCI bridge. Set context for ddip's + * secondary bus. If rdip is on ddip's secondary + * bus, set context for rdip. Else, set context + * for rdip's PCI bridge on ddip's secondary bus. + */ + context_set(immu, domain, immu->immu_ctx_root, + d_immu_devi->imd_sec, 0); + if (d_immu_devi->imd_sec == r_bus) { + context_set(immu, domain, immu->immu_ctx_root, + r_bus, r_devfunc); + } else { + pcibdip = NULL; + if (immu_walk_ancestor(rdip, ddip, find_top_pcib, + &pcibdip, NULL, immu_flags) == DDI_SUCCESS && + pcibdip != NULL) { + ASSERT(pcibdip); + r_immu_devi = immu_devi_get(pcibdip); + ASSERT(d_immu_devi); + ASSERT(d_immu_devi->imd_pcib_type == + IMMU_PCIB_PCI_PCI); + r_bus = r_immu_devi->imd_bus; + r_devfunc = r_immu_devi->imd_devfunc; + context_set(immu, domain, immu->immu_ctx_root, + r_bus, r_devfunc); + } else { + ddi_err(DER_PANIC, rdip, "Failed to find PCI " + " bridge for PCI device"); + /*NOTREACHED*/ + } + } + } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) { + context_set(immu, domain, immu->immu_ctx_root, d_bus, + d_devfunc); + } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) { + ASSERT(r_pcib_type == IMMU_PCIB_NOBDF); + /* + * ddip is a PCIE device which has a non-PCI device under it + * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata + */ + context_set(immu, domain, immu->immu_ctx_root, d_bus, + d_devfunc); + } else { + ddi_err(DER_PANIC, rdip, "unknown device type. Cannot " + "set IMMU context."); + /*NOTREACHED*/ + } + rw_exit(&(immu->immu_ctx_rwlock)); + + /* XXX do we need a membar_producer() here */ + return (DDI_SUCCESS); +} + +/* ##################### END CONTEXT CODE ################################## */ +/* ##################### MAPPING CODE ################################## */ + + +static boolean_t +PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + if (immu_flags & IMMU_FLAGS_PAGE1) { + ASSERT(paddr == 0); + } else { + ASSERT((next == NULL) ^ (paddr == 0)); + } + + /* The PDTE must be set i.e. present bit is set */ + if (!PDTE_P(pdte)) { + ddi_err(DER_MODE, rdip, "No present flag"); + return (B_FALSE); + } + + /* + * Just assert to check most significant system software field + * (PDTE_SW4) as it is same as present bit and we + * checked that above + */ + ASSERT(PDTE_SW4(pdte)); + + /* + * TM field should be clear if not reserved. + * non-leaf is always reserved + */ + if (next == NULL && immu_regs_is_TM_reserved(immu) == B_FALSE) { + if (PDTE_TM(pdte)) { + ddi_err(DER_MODE, rdip, "TM flag set"); + return (B_FALSE); + } + } + + /* + * The SW3 field is not used and must be clear + */ + if (PDTE_SW3(pdte)) { + ddi_err(DER_MODE, rdip, "SW3 set"); + return (B_FALSE); + } + + /* + * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set + */ + if (next == NULL) { + ASSERT(paddr % IMMU_PAGESIZE == 0); + if (PDTE_PADDR(pdte) != paddr) { + ddi_err(DER_MODE, rdip, + "PTE paddr mismatch: %lx != %lx", + PDTE_PADDR(pdte), paddr); + return (B_FALSE); + } + } else { + if (PDTE_PADDR(pdte) != next->hwpg_paddr) { + ddi_err(DER_MODE, rdip, + "PDE paddr mismatch: %lx != %lx", + PDTE_PADDR(pdte), next->hwpg_paddr); + return (B_FALSE); + } + } + + /* + * SNP field should be clear if not reserved. + * non-leaf is always reserved + */ + if (next == NULL && immu_regs_is_SNP_reserved(immu) == B_FALSE) { + if (PDTE_SNP(pdte)) { + ddi_err(DER_MODE, rdip, "SNP set"); + return (B_FALSE); + } + } + + /* second field available for system software should be clear */ + if (PDTE_SW2(pdte)) { + ddi_err(DER_MODE, rdip, "SW2 set"); + return (B_FALSE); + } + + /* Super pages field should be clear */ + if (PDTE_SP(pdte)) { + ddi_err(DER_MODE, rdip, "SP set"); + return (B_FALSE); + } + + /* + * least significant field available for + * system software should be clear + */ + if (PDTE_SW1(pdte)) { + ddi_err(DER_MODE, rdip, "SW1 set"); + return (B_FALSE); + } + + if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) { + ddi_err(DER_MODE, rdip, "READ not set"); + return (B_FALSE); + } + + if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) { + ddi_err(DER_MODE, rdip, "WRITE not set"); + return (B_FALSE); + } + + return (B_TRUE); +} +/*ARGSUSED*/ +static void +PTE_clear_one(immu_t *immu, domain_t *domain, xlate_t *xlate, uint64_t dvma, + dev_info_t *rdip) +{ + hw_pdte_t *hwp; + pgtable_t *pgtable; + int idx; + hw_pdte_t pte; + + ASSERT(xlate->xlt_level == 1); + + idx = xlate->xlt_idx; + pgtable = xlate->xlt_pgtable; + + ASSERT(dvma % IMMU_PAGESIZE == 0); + ASSERT(pgtable); + ASSERT(idx <= IMMU_PGTABLE_MAXIDX); + + /* + * since we are clearing PTEs, lock the + * page table write mode + */ + rw_enter(&(pgtable->swpg_rwlock), RW_WRITER); + + /* + * We are at the leaf - next level array must be NULL + */ + ASSERT(pgtable->swpg_next_array == NULL); + + hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; + + pte = *hwp; + /* Cannot clear a HW PTE that is aleady clear */ + ASSERT(PDTE_P(pte)); + PDTE_CLEAR_P(pte); + *hwp = pte; + + /* flush writes to HW PTE table */ + immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t)); + + rw_exit(&(xlate->xlt_pgtable->swpg_rwlock)); +} + +/*ARGSUSED*/ +static void +xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate, + int nlevels, dev_info_t *rdip) +{ + int level; + uint64_t offbits; + + /* level 0 is never used. Sanity check */ + ASSERT(xlate->xlt_level == 0); + ASSERT(xlate->xlt_idx == 0); + ASSERT(xlate->xlt_pgtable == NULL); + ASSERT(dvma % IMMU_PAGESIZE == 0); + + /* + * Skip the first 12 bits which is the offset into + * 4K PFN (phys page frame based on IMMU_PAGESIZE) + */ + offbits = dvma >> IMMU_PAGESHIFT; + + /* skip to level 1 i.e. leaf PTE */ + for (level = 1, xlate++; level <= nlevels; level++, xlate++) { + xlate->xlt_level = level; + xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK); + ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX); + xlate->xlt_pgtable = NULL; + offbits >>= IMMU_PGTABLE_LEVEL_STRIDE; + } +} + +/* + * Read the pgtables + */ +static void +PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, + dev_info_t *rdip) +{ + pgtable_t *pgtable; + pgtable_t *next; + hw_pdte_t pde; + uint_t idx; + + /* xlate should be at level 0 */ + ASSERT(xlate->xlt_level == 0); + ASSERT(xlate->xlt_idx == 0); + + /* start with highest level pgtable i.e. root */ + xlate += nlevels; + ASSERT(xlate->xlt_level == nlevels); + + if (xlate->xlt_pgtable == NULL) { + xlate->xlt_pgtable = domain->dom_pgtable_root; + } + + for (; xlate->xlt_level > 1; xlate--) { + + idx = xlate->xlt_idx; + pgtable = xlate->xlt_pgtable; + + ASSERT(pgtable); + ASSERT(idx <= IMMU_PGTABLE_MAXIDX); + + if ((xlate - 1)->xlt_pgtable) { + continue; + } + + /* xlate's leafier level is not set, set it now */ + + /* Lock the pgtable in read mode */ + rw_enter(&(pgtable->swpg_rwlock), RW_READER); + + /* + * since we are unmapping, the pgtable should + * already point to a leafier pgtable. + */ + next = *(pgtable->swpg_next_array + idx); + ASSERT(next); + + pde = *((hw_pdte_t *)(pgtable->hwpg_vaddr) + idx); + + ASSERT(PDTE_check(immu, pde, next, 0, rdip, 0) == B_TRUE); + + (xlate - 1)->xlt_pgtable = next; + + rw_exit(&(pgtable->swpg_rwlock)); + } +} + +static void +PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + hw_pdte_t pte; + + pte = *hwp; + + if (PDTE_P(pte)) { + if (PDTE_PADDR(pte) != paddr) { + ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx", + PDTE_PADDR(pte), paddr); + } + goto out; + } + + + /* Don't touch SW4. It is the present field */ + + /* clear TM field if not reserved */ + if (immu_regs_is_TM_reserved(immu) == B_FALSE) { + PDTE_CLEAR_TM(pte); + } + + /* Clear 3rd field for system software - not used */ + PDTE_CLEAR_SW3(pte); + + /* Set paddr */ + ASSERT(paddr % IMMU_PAGESIZE == 0); + PDTE_CLEAR_PADDR(pte); + PDTE_SET_PADDR(pte, paddr); + + /* clear SNP field if not reserved. */ + if (immu_regs_is_SNP_reserved(immu) == B_FALSE) { + PDTE_CLEAR_SNP(pte); + } + + /* Clear SW2 field available for software */ + PDTE_CLEAR_SW2(pte); + + /* SP is don't care for PTEs. Clear it for cleanliness */ + PDTE_CLEAR_SP(pte); + + /* Clear SW1 field available for software */ + PDTE_CLEAR_SW1(pte); + + /* + * Now that we are done writing the PTE + * set the "present" flag. Note this present + * flag is a bit in the PDE/PTE that the + * spec says is available for system software. + * This is an implementation detail of Solaris + * bare-metal Intel IOMMU. + * The present field in a PDE/PTE is not defined + * by the Vt-d spec + */ + + PDTE_SET_P(pte); + +out: + if (immu_flags & IMMU_FLAGS_READ) + PDTE_SET_READ(pte); + if (immu_flags & IMMU_FLAGS_WRITE) + PDTE_SET_WRITE(pte); + +#ifdef BUGGY_DRIVERS + PDTE_SET_READ(pte); + PDTE_SET_WRITE(pte); +#endif + + *hwp = pte; +} + +/*ARGSUSED*/ +static void +PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, + uint64_t *dvma_ptr, paddr_t *paddr_ptr, uint64_t *npages_ptr, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + paddr_t paddr; + uint64_t npages; + uint64_t dvma; + pgtable_t *pgtable; + hw_pdte_t *hwp; + hw_pdte_t *shwp; + int idx; + + ASSERT(xlate->xlt_level == 1); + + pgtable = xlate->xlt_pgtable; + idx = xlate->xlt_idx; + + ASSERT(idx <= IMMU_PGTABLE_MAXIDX); + ASSERT(pgtable); + + dvma = *dvma_ptr; + paddr = *paddr_ptr; + npages = *npages_ptr; + + ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1)); + ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1)); + ASSERT(npages); + + /* + * since we are setting PTEs, lock the page table in + * write mode + */ + rw_enter(&(pgtable->swpg_rwlock), RW_WRITER); + + /* + * we are at the leaf pgtable - no further levels. + * The next_array field should be NULL. + */ + ASSERT(pgtable->swpg_next_array == NULL); + + shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; + + hwp = shwp; + for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) { + + PTE_set_one(immu, hwp, paddr, rdip, immu_flags); + + ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags) + == B_TRUE); + + paddr += IMMU_PAGESIZE; + dvma += IMMU_PAGESIZE; + npages--; + } + + /* flush writes to HW PTE table */ + immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) * + sizeof (hw_pdte_t)); + + *dvma_ptr = dvma; + *paddr_ptr = paddr; + *npages_ptr = npages; + xlate->xlt_idx = idx; + + rw_exit(&(pgtable->swpg_rwlock)); +} + +/*ARGSUSED*/ +static void +PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + hw_pdte_t pde; + + pde = *hwp; + + /* if PDE is already set, make sure it is correct */ + if (PDTE_P(pde)) { + ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr); + goto out; + } + + /* Dont touch SW4, it is the present bit */ + + /* don't touch TM field it is reserved for PDEs */ + + /* 3rd field available for system software is not used */ + PDTE_CLEAR_SW3(pde); + + /* Set next level pgtable-paddr for PDE */ + ASSERT(next->hwpg_paddr % IMMU_PAGESIZE == 0); + PDTE_CLEAR_PADDR(pde); + PDTE_SET_PADDR(pde, next->hwpg_paddr); + + /* don't touch SNP field it is reserved for PDEs */ + + /* Clear second field available for system software */ + PDTE_CLEAR_SW2(pde); + + /* No super pages for PDEs */ + PDTE_CLEAR_SP(pde); + + /* Clear SW1 for software */ + PDTE_CLEAR_SW1(pde); + + /* + * Now that we are done writing the PDE + * set the "present" flag. Note this present + * flag is a bit in the PDE/PTE that the + * spec says is available for system software. + * This is an implementation detail of Solaris + * base-metal Intel IOMMU. + * The present field in a PDE/PTE is not defined + * by the Vt-d spec + */ +out: + + if (immu_flags & IMMU_FLAGS_READ) + PDTE_SET_READ(pde); + if (immu_flags & IMMU_FLAGS_WRITE) + PDTE_SET_WRITE(pde); + +#ifdef BUGGY_DRIVERS + PDTE_SET_READ(pde); + PDTE_SET_WRITE(pde); +#endif + + PDTE_SET_P(pde); + + *hwp = pde; + + immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t)); +} + +/* + * Used to set PDEs + */ +static void +PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + pgtable_t *pgtable; + pgtable_t *new; + pgtable_t *next; + hw_pdte_t *hwp; + int level; + uint_t idx; + + /* xlate should be at level 0 */ + ASSERT(xlate->xlt_level == 0); + ASSERT(xlate->xlt_idx == 0); + + /* start with highest level pgtable i.e. root */ + xlate += nlevels; + ASSERT(xlate->xlt_level == nlevels); + + new = NULL; + xlate->xlt_pgtable = domain->dom_pgtable_root; + for (level = nlevels; level > 1; level--, xlate--) { + + ASSERT(xlate->xlt_level == level); + + idx = xlate->xlt_idx; + pgtable = xlate->xlt_pgtable; + + ASSERT(pgtable); + ASSERT(idx <= IMMU_PGTABLE_MAXIDX); + + /* speculative alloc */ + if (new == NULL) { + new = pgtable_alloc(immu, domain, immu_flags); + if (new == NULL) { + ddi_err(DER_PANIC, rdip, "pgtable alloc err"); + } + + } + + /* Alway lock the pgtable in write mode */ + rw_enter(&(pgtable->swpg_rwlock), RW_WRITER); + + hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; + + ASSERT(pgtable->swpg_next_array); + + next = (pgtable->swpg_next_array)[idx]; + + /* + * check if leafier level already has a pgtable + * if yes, verify + */ + if (next == NULL) { + next = new; + new = NULL; + if (level == 2) { + /* leaf cannot have next_array */ + kmem_free(next->swpg_next_array, + IMMU_PAGESIZE); + next->swpg_next_array = NULL; + } + (pgtable->swpg_next_array)[idx] = next; + PDE_set_one(immu, hwp, next, rdip, immu_flags); + } else { + hw_pdte_t pde = *hwp; + + if (immu_flags & IMMU_FLAGS_READ) + PDTE_SET_READ(pde); + if (immu_flags & IMMU_FLAGS_WRITE) + PDTE_SET_WRITE(pde); + +#ifdef BUGGY_DRIVERS +/* If buggy driver we already set permission READ+WRITE so nothing to do */ +#endif + + *hwp = pde; + } + + ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags) + == B_TRUE); + + (xlate - 1)->xlt_pgtable = next; + + rw_exit(&(pgtable->swpg_rwlock)); + } + + if (new) { + pgtable_free(immu, new, domain); + } +} + +/* + * dvma_map() + * map a contiguous range of DVMA pages + * + * immu: IOMMU unit for which we are generating DVMA cookies + * domain: domain + * sdvma: Starting dvma + * spaddr: Starting paddr + * npages: Number of pages + * rdip: requesting device + * immu_flags: flags + */ +static void +dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t spaddr, + uint64_t npages, dev_info_t *rdip, immu_flags_t immu_flags) +{ + uint64_t dvma; + paddr_t paddr; + uint64_t n; + int nlevels = immu->immu_dvma_nlevels; + xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; + + ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); + ASSERT(spaddr % IMMU_PAGESIZE == 0); + ASSERT(sdvma % IMMU_PAGESIZE == 0); + ASSERT(npages); + + n = npages; + dvma = sdvma; + paddr = spaddr; + + while (n > 0) { + xlate_setup(immu, dvma, xlate, nlevels, rdip); + + /* Lookup or allocate PGDIRs and PGTABLEs if necessary */ + PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags); + + /* set all matching ptes that fit into this leaf pgtable */ + PTE_set_all(immu, domain, &xlate[1], &dvma, &paddr, &n, rdip, + immu_flags); + } +} + +/* + * dvma_unmap() + * unmap a range of DVMAs + * + * immu: IOMMU unit state + * domain: domain for requesting device + * ddip: domain-dip + * dvma: starting DVMA + * npages: Number of IMMU pages to be unmapped + * rdip: requesting device + */ +static void +dvma_unmap(immu_t *immu, domain_t *domain, uint64_t dvma, uint64_t snpages, + dev_info_t *rdip) +{ + int nlevels = immu->immu_dvma_nlevels; + xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; + uint64_t npages; + + ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); + ASSERT(dvma != 0); + ASSERT(dvma % IMMU_PAGESIZE == 0); + ASSERT(snpages); + + for (npages = snpages; npages > 0; npages--) { + /* setup the xlate array */ + xlate_setup(immu, dvma, xlate, nlevels, rdip); + + /* just lookup existing pgtables. Should never fail */ + PDE_lookup(immu, domain, xlate, nlevels, rdip); + + /* XXX should be more efficient - batch clear */ + PTE_clear_one(immu, domain, &xlate[1], dvma, rdip); + + dvma += IMMU_PAGESIZE; + } +} + +static uint64_t +dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages) +{ + ddi_dma_attr_t *dma_attr; + uint64_t dvma; + size_t xsize, align, nocross; + uint64_t minaddr, maxaddr; + + ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY); + + /* shotcuts */ + dma_attr = &(hp->dmai_attr); + + /* parameters */ + xsize = npages * IMMU_PAGESIZE; + align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE); + nocross = (size_t)(dma_attr->dma_attr_seg + 1); + minaddr = dma_attr->dma_attr_addr_lo; + maxaddr = dma_attr->dma_attr_addr_hi + 1; + + /* handle the rollover cases */ + if (maxaddr < dma_attr->dma_attr_addr_hi) { + maxaddr = dma_attr->dma_attr_addr_hi; + } + + /* + * allocate from vmem arena. + */ + dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena, + xsize, align, 0, nocross, (void *)(uintptr_t)minaddr, + (void *)(uintptr_t)maxaddr, VM_NOSLEEP); + + ASSERT(dvma); + ASSERT(dvma >= minaddr); + ASSERT(dvma + xsize - 1 < maxaddr); + + return (dvma); +} + +static void +dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages) +{ + uint64_t size = npages * IMMU_PAGESIZE; + + ASSERT(domain); + ASSERT(domain->dom_did > 0); + ASSERT(dvma); + ASSERT(npages); + + if (domain->dom_maptype != IMMU_MAPTYPE_XLATE) { + ASSERT(domain->dom_maptype == IMMU_MAPTYPE_UNITY); + return; + } + + vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size); +} +/*ARGSUSED*/ +static void +cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain, + dev_info_t *ddip, dev_info_t *rdip) +{ + int i; + uint64_t dvma; + uint64_t npages; + dvcookie_t *dvcookies = dma->dp_dvcookies; + uint64_t dvmax = dma->dp_dvmax; + + ASSERT(dma->dp_max_cookies); + ASSERT(dma->dp_max_dcookies); + ASSERT(dma->dp_dvmax < dma->dp_max_cookies); + ASSERT(dma->dp_dmax < dma->dp_max_dcookies); + + for (i = 0; i <= dvmax; i++) { + dvma = dvcookies[i].dvck_dvma; + npages = dvcookies[i].dvck_npages; + dvma_unmap(immu, domain, dvma, npages, rdip); + dvma_free(domain, dvma, npages); + } + + kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies); + dma->dp_dvcookies = NULL; + kmem_free(dma->dp_dcookies, sizeof (dcookie_t) * dma->dp_max_dcookies); + dma->dp_dcookies = NULL; + if (dma->dp_need_to_free_cookie == B_TRUE) { + kmem_free(dma->dp_cookies, sizeof (ddi_dma_cookie_t) * + dma->dp_max_cookies); + dma->dp_dcookies = NULL; + dma->dp_need_to_free_cookie = B_FALSE; + } + + dma->dp_max_cookies = 0; + dma->dp_max_dcookies = 0; + dma->dp_cookie_size = 0; + dma->dp_dvmax = 0; + dma->dp_dmax = 0; +} + +/* + * cookie_alloc() + */ +static int +cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq, + ddi_dma_attr_t *attr, uint_t prealloc) +{ + int kmflag; + rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo); + dvcookie_t *dvcookies = dma->dp_dvcookies; + dcookie_t *dcookies = dma->dp_dcookies; + ddi_dma_cookie_t *cookies = dma->dp_cookies; + uint64_t max_cookies; + uint64_t max_dcookies; + uint64_t cookie_size; + + /* we need to allocate new array */ + if (dmareq->dmar_fp == DDI_DMA_SLEEP) { + kmflag = KM_SLEEP; + } else { + kmflag = KM_NOSLEEP; + } + + /* + * XXX make sure cookies size doen't exceed sinfo->si_max_cookie_size; + */ + + /* + * figure out the rough estimate of array size + * At a minimum, each cookie must hold 1 page. + * At a maximum, it cannot exceed dma_attr_sgllen + */ + max_dcookies = dmareq->dmar_object.dmao_size + IMMU_PAGEOFFSET; + max_dcookies /= IMMU_PAGESIZE; + max_dcookies++; + max_cookies = MIN(max_dcookies, attr->dma_attr_sgllen); + + /* allocate the dvma cookie array */ + dvcookies = kmem_zalloc(sizeof (dvcookie_t) * max_cookies, kmflag); + if (dvcookies == NULL) { + return (DDI_FAILURE); + } + + /* allocate the "phys" cookie array */ + dcookies = kmem_zalloc(sizeof (dcookie_t) * max_dcookies, kmflag); + if (dcookies == NULL) { + kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies); + dvcookies = NULL; + return (DDI_FAILURE); + } + + /* allocate the "real" cookie array - the one given to users */ + cookie_size = sizeof (ddi_dma_cookie_t) * max_cookies; + if (max_cookies > prealloc) { + cookies = kmem_zalloc(cookie_size, kmflag); + if (cookies == NULL) { + kmem_free(dvcookies, sizeof (dvcookie_t) * + max_cookies); + kmem_free(dcookies, sizeof (dcookie_t) * + max_dcookies); + goto fail; + } + dma->dp_need_to_free_cookie = B_TRUE; + } else { + /* the preallocated buffer fits this size */ + cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer; + bzero(cookies, sizeof (ddi_dma_cookie_t) * max_cookies); + dma->dp_need_to_free_cookie = B_FALSE; + } + + dma->dp_dvcookies = dvcookies; + dma->dp_dcookies = dcookies; + dma->dp_cookies = cookies; + dma->dp_cookie_size = cookie_size; + dma->dp_max_cookies = max_cookies; + dma->dp_max_dcookies = max_dcookies; + dma->dp_dvmax = 0; + dma->dp_dmax = 0; + + sinfo->si_max_pages = dma->dp_max_cookies; + + return (DDI_SUCCESS); + +fail: + dma->dp_dvcookies = NULL; + dma->dp_dcookies = NULL; + dma->dp_cookies = NULL; + dma->dp_cookie_size = 0; + dma->dp_max_cookies = 0; + dma->dp_max_dcookies = 0; + dma->dp_dvmax = 0; + dma->dp_dmax = 0; + dma->dp_need_to_free_cookie = B_FALSE; + sinfo->si_max_pages = 0; + return (DDI_FAILURE); +} + +/*ARGSUSED*/ +static void +cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr, + int64_t psize, uint64_t maxseg) +{ + dvcookie_t *dvcookies = dma->dp_dvcookies; + dcookie_t *dcookies = dma->dp_dcookies; + ddi_dma_cookie_t *cookies = dma->dp_cookies; + uint64_t dvmax = dma->dp_dvmax; + uint64_t dmax = dma->dp_dmax; + + ASSERT(dvmax < dma->dp_max_cookies); + ASSERT(dmax < dma->dp_max_dcookies); + + paddr &= IMMU_PAGEMASK; + + ASSERT(paddr); + ASSERT(psize); + ASSERT(maxseg); + + /* + * check to see if this page would put us + * over the max cookie size + */ + if (cookies[dvmax].dmac_size + psize > maxseg) { + dvcookies[dvmax].dvck_eidx = dmax; + dvmax++; /* use the next dvcookie */ + dmax++; /* also mean we use the next dcookie */ + dvcookies[dvmax].dvck_sidx = dmax; + + ASSERT(dvmax < dma->dp_max_cookies); + ASSERT(dmax < dma->dp_max_dcookies); + } + + /* + * If the cookie is mapped or empty + */ + if (dvcookies[dvmax].dvck_dvma != 0 || + dvcookies[dvmax].dvck_npages == 0) { + /* if mapped, we need a new empty one */ + if (dvcookies[dvmax].dvck_dvma != 0) { + dvcookies[dvmax].dvck_eidx = dmax; + dvmax++; + dmax++; + dvcookies[dvmax].dvck_sidx = dma->dp_dmax; + ASSERT(dvmax < dma->dp_max_cookies); + ASSERT(dmax < dma->dp_max_dcookies); + } + + /* ok, we have an empty cookie */ + ASSERT(cookies[dvmax].dmac_size == 0); + ASSERT(dvcookies[dvmax].dvck_dvma == 0); + ASSERT(dvcookies[dvmax].dvck_npages + == 0); + ASSERT(dcookies[dmax].dck_paddr == 0); + ASSERT(dcookies[dmax].dck_npages == 0); + + dvcookies[dvmax].dvck_dvma = 0; + dvcookies[dvmax].dvck_npages = 1; + dcookies[dmax].dck_paddr = paddr; + dcookies[dmax].dck_npages = 1; + cookies[dvmax].dmac_size = psize; + } else { + /* Unmapped cookie but not empty. Add to it */ + cookies[dma->dp_dvmax].dmac_size += psize; + ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0); + dvcookies[dma->dp_dvmax].dvck_npages++; + ASSERT(dcookies[dmax].dck_paddr != 0); + ASSERT(dcookies[dmax].dck_npages != 0); + + /* Check if this paddr is contiguous */ + if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) { + dcookies[dmax].dck_npages++; + } else { + /* No, we need a new dcookie */ + dmax++; + ASSERT(dcookies[dmax].dck_paddr == 0); + ASSERT(dcookies[dmax].dck_npages == 0); + dcookies[dmax].dck_paddr = paddr; + dcookies[dmax].dck_npages = 1; + } + } + + dma->dp_dvmax = dvmax; + dma->dp_dmax = dmax; +} + +static void +cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain, + dev_info_t *rdip, immu_flags_t immu_flags) +{ + int i; + int j; + rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private; + dvcookie_t *dvcookies = dma->dp_dvcookies; + dcookie_t *dcookies = dma->dp_dcookies; + ddi_dma_cookie_t *cookies = dma->dp_cookies; + paddr_t paddr; + uint64_t npages; + uint64_t dvma; + + for (i = 0; i <= dma->dp_dvmax; i++) { + /* Finish up the last cookie */ + if (i == dma->dp_dvmax) { + dvcookies[i].dvck_eidx = dma->dp_dmax; + } + if ((dvma = dvcookies[i].dvck_dvma) != 0) { + cookies[i].dmac_laddress = dvma; + ASSERT(cookies[i].dmac_size != 0); + cookies[i].dmac_type = 0; + for (j = dvcookies[i].dvck_sidx; + j <= dvcookies[i].dvck_eidx; j++) { + ASSERT(dcookies[j].dck_paddr != 0); + ASSERT(dcookies[j].dck_npages != 0); + } + continue; + } + + dvma = dvma_alloc(hp, domain, dvcookies[i].dvck_npages); + + dvcookies[i].dvck_dvma = dvma; + + /* Set "real" cookies addr, cookie size already set */ + cookies[i].dmac_laddress = dvma; + ASSERT(cookies[i].dmac_size != 0); + cookies[i].dmac_type = 0; + + for (j = dvcookies[i].dvck_sidx; + j <= dvcookies[i].dvck_eidx; j++) { + + paddr = dcookies[j].dck_paddr; + npages = dcookies[j].dck_npages; + + ASSERT(paddr); + ASSERT(npages); + + dvma_map(immu, domain, dvma, paddr, npages, + rdip, immu_flags); + dvma += npages * IMMU_PAGESIZE; + } + } +} + +/* + * cookie_create() + */ +static int +cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, + ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip, + uint_t prealloc_count, immu_flags_t immu_flags) +{ + + ddi_dma_atyp_t buftype; + uint64_t offset; + page_t **pparray; + uint64_t paddr; + uint_t psize; + uint_t size; + uint64_t maxseg; + caddr_t vaddr; + uint_t pcnt; + page_t *page; + rootnex_sglinfo_t *sglinfo; + ddi_dma_obj_t *dmar_object; + rootnex_dma_t *dma; + + dma = (rootnex_dma_t *)hp->dmai_private; + sglinfo = &(dma->dp_sglinfo); + dmar_object = &(dmareq->dmar_object); + maxseg = sglinfo->si_max_cookie_size; + pparray = dmar_object->dmao_obj.virt_obj.v_priv; + vaddr = dmar_object->dmao_obj.virt_obj.v_addr; + buftype = dmar_object->dmao_type; + size = dmar_object->dmao_size; + + /* + * Allocate cookie, dvcookie and dcookie + */ + if (cookie_alloc(dma, dmareq, a, prealloc_count) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + hp->dmai_cookie = dma->dp_cookies; + + pcnt = 0; + + /* retrieve paddr, psize, offset from dmareq */ + if (buftype == DMA_OTYP_PAGES) { + page = dmar_object->dmao_obj.pp_obj.pp_pp; + ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); + offset = dmar_object->dmao_obj.pp_obj.pp_offset & + MMU_PAGEOFFSET; + paddr = pfn_to_pa(page->p_pagenum) + offset; + psize = MIN((MMU_PAGESIZE - offset), size); + sglinfo->si_asp = NULL; + page = page->p_next; + } else { + ASSERT((buftype == DMA_OTYP_VADDR) || + (buftype == DMA_OTYP_BUFVADDR)); + sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as; + if (sglinfo->si_asp == NULL) { + sglinfo->si_asp = &kas; + } + offset = (uintptr_t)vaddr & MMU_PAGEOFFSET; + if (pparray != NULL) { + ASSERT(!PP_ISFREE(pparray[pcnt])); + paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset; + psize = MIN((MMU_PAGESIZE - offset), size); + pcnt++; + } else { + paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, + vaddr)) + offset; + psize = MIN(size, (MMU_PAGESIZE - offset)); + vaddr += psize; + } + } + + /* save the iommu page offset */ + sglinfo->si_buf_offset = offset & IMMU_PAGEOFFSET; + + /* + * setup dvcookie and dcookie for [paddr, paddr+psize) + */ + cookie_update(domain, dma, paddr, psize, maxseg); + + size -= psize; + while (size > 0) { + /* get the size for this page (i.e. partial or full page) */ + psize = MIN(size, MMU_PAGESIZE); + if (buftype == DMA_OTYP_PAGES) { + /* get the paddr from the page_t */ + ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); + paddr = pfn_to_pa(page->p_pagenum); + page = page->p_next; + } else if (pparray != NULL) { + /* index into the array of page_t's to get the paddr */ + ASSERT(!PP_ISFREE(pparray[pcnt])); + paddr = pfn_to_pa(pparray[pcnt]->p_pagenum); + pcnt++; + } else { + /* call into the VM to get the paddr */ + paddr = pfn_to_pa(hat_getpfnum + (sglinfo->si_asp->a_hat, vaddr)); + vaddr += psize; + } + /* + * set dvcookie and dcookie for [paddr, paddr+psize) + */ + cookie_update(domain, dma, paddr, psize, maxseg); + size -= psize; + } + + cookie_finalize(hp, immu, domain, rdip, immu_flags); + + /* take account in the offset into the first page */ + dma->dp_cookies[0].dmac_laddress += sglinfo->si_buf_offset; + + /* save away how many cookies we have */ + sglinfo->si_sgl_size = dma->dp_dvmax + 1; + + return (DDI_SUCCESS); +} + +/* ############################# Functions exported ######################## */ + +/* + * setup the DVMA subsystem + * this code runs only for the first IOMMU unit + */ +void +immu_dvma_setup(list_t *listp) +{ + immu_t *immu; + uint_t kval; + size_t nchains; + + /* locks */ + mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL); + + /* Create lists */ + list_create(&immu_unity_domain_list, sizeof (domain_t), + offsetof(domain_t, dom_maptype_node)); + list_create(&immu_xlate_domain_list, sizeof (domain_t), + offsetof(domain_t, dom_maptype_node)); + + /* Setup BDF domain hash */ + nchains = 0xff; + kval = mod_hash_iddata_gen(nchains); + + bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH", + nchains, mod_hash_null_keydtor, mod_hash_null_valdtor, + mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp, + KM_NOSLEEP); + ASSERT(bdf_domain_hash); + + immu = list_head(listp); + for (; immu; immu = list_next(listp, immu)) { + create_unity_domain(immu); + did_init(immu); + context_init(immu); + immu->immu_dvma_setup = B_TRUE; + } +} + +/* + * Startup up one DVMA unit + */ +void +immu_dvma_startup(immu_t *immu) +{ + ASSERT(immu); + ASSERT(immu->immu_dvma_running == B_FALSE); + + if (immu_gfxdvma_enable == B_FALSE && + immu->immu_dvma_gfx_only == B_TRUE) { + return; + } + + /* + * DVMA will start once IOMMU is "running" + */ + ASSERT(immu->immu_dvma_running == B_FALSE); + immu->immu_dvma_running = B_TRUE; +} + +/* + * immu_dvma_physmem_update() + * called when the installed memory on a + * system increases, to expand domain DVMA + * for domains with UNITY mapping + */ +void +immu_dvma_physmem_update(uint64_t addr, uint64_t size) +{ + uint64_t start; + uint64_t npages; + domain_t *domain; + + /* + * Just walk the system-wide list of domains with + * UNITY mapping. Both the list of *all* domains + * and *UNITY* domains is protected by the same + * single lock + */ + mutex_enter(&immu_domain_lock); + domain = list_head(&immu_unity_domain_list); + for (; domain; domain = list_next(&immu_unity_domain_list, domain)) { + + /* There is no vmem_arena for unity domains. Just map it */ + ddi_err(DER_LOG, NULL, "IMMU: unity-domain: Adding map " + "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size); + + start = IMMU_ROUNDOWN(addr); + npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1; + + dvma_map(domain->dom_immu, domain, start, start, + npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); + + } + mutex_exit(&immu_domain_lock); +} + +int +immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng, + uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags) +{ + ddi_dma_attr_t *attr; + dev_info_t *ddip; + domain_t *domain; + immu_t *immu; + int r = DDI_FAILURE; + + ASSERT(immu_enable == B_TRUE); + ASSERT(immu_running == B_TRUE || !(immu_flags & IMMU_FLAGS_DMAHDL)); + ASSERT(hp || !(immu_flags & IMMU_FLAGS_DMAHDL)); + + /* + * Intel IOMMU will only be turned on if IOMMU + * page size is a multiple of IOMMU page size + */ + + /*LINTED*/ + ASSERT(MMU_PAGESIZE % IMMU_PAGESIZE == 0); + + /* Can only do DVMA if dip is attached */ + if (rdip == NULL) { + ddi_err(DER_PANIC, rdip, "DVMA map: No device specified"); + /*NOTREACHED*/ + } + + immu_flags |= dma_to_immu_flags(dmareq); + + + /* + * Setup DVMA domain for the device. This does + * work only the first time we do DVMA for a + * device. + */ + ddip = NULL; + domain = device_domain(rdip, &ddip, immu_flags); + if (domain == NULL) { + ASSERT(ddip == NULL); + ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device"); + return (DDI_DMA_NORESOURCES); + } + + /* + * If a domain is found, we must also have a domain dip + * which is the topmost ancestor dip of rdip that shares + * the same domain with rdip. + */ + if (domain->dom_did == 0 || ddip == NULL) { + ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)", + domain->dom_did, ddip); + return (DDI_DMA_NORESOURCES); + } + + immu = domain->dom_immu; + ASSERT(immu); + if (domain->dom_did == IMMU_UNITY_DID) { + ASSERT(domain == immu->immu_unity_domain); + + /* mapping already done. Let rootnex create cookies */ + r = DDI_DMA_USE_PHYSICAL; + } else if (immu_flags & IMMU_FLAGS_DMAHDL) { + + /* if we have a DMA handle, the IOMMUs must be running */ + ASSERT(immu->immu_regs_running == B_TRUE); + ASSERT(immu->immu_dvma_running == B_TRUE); + + attr = &hp->dmai_attr; + if (attr == NULL) { + ddi_err(DER_PANIC, rdip, + "DMA handle (%p): NULL attr", hp); + /*NOTREACHED*/ + } + if (cookie_create(hp, dmareq, attr, immu, domain, rdip, + prealloc_count, immu_flags) != DDI_SUCCESS) { + ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed"); + return (DDI_DMA_NORESOURCES); + } + + /* flush write buffer */ + immu_regs_wbf_flush(immu); + r = DDI_DMA_MAPPED; + } else if (immu_flags & IMMU_FLAGS_MEMRNG) { + dvma_map(immu, domain, mrng->mrng_start, mrng->mrng_start, + mrng->mrng_npages, rdip, immu_flags); + r = DDI_DMA_MAPPED; + } else { + ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()"); + /*NOTREACHED*/ + } + + /* + * Update the root and context entries + */ + if (immu_context_update(immu, domain, ddip, rdip, immu_flags) + != DDI_SUCCESS) { + ddi_err(DER_MODE, rdip, "DVMA map: context update failed"); + return (DDI_DMA_NORESOURCES); + } + + /* flush caches */ + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + immu_regs_context_flush(immu, 0, 0, domain->dom_did, CONTEXT_DSI); + rw_exit(&(immu->immu_ctx_rwlock)); + immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, TLB_IVA_WHOLE, + IOTLB_DSI); + immu_regs_wbf_flush(immu); + + return (r); +} + +int +immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip) +{ + ddi_dma_attr_t *attr; + rootnex_dma_t *dma; + domain_t *domain; + immu_t *immu; + dev_info_t *ddip; + immu_flags_t immu_flags; + + ASSERT(immu_enable == B_TRUE); + ASSERT(immu_running == B_TRUE); + ASSERT(hp); + + /* + * Intel IOMMU will only be turned on if IOMMU + * page size is same as MMU page size + */ + /*LINTED*/ + ASSERT(MMU_PAGESIZE == IMMU_PAGESIZE); + + /* rdip need not be attached */ + if (rdip == NULL) { + ddi_err(DER_PANIC, rdip, "DVMA unmap: No device specified"); + return (DDI_DMA_NORESOURCES); + } + + /* + * Get the device domain, this should always + * succeed since there had to be a domain to + * setup DVMA. + */ + dma = (rootnex_dma_t *)hp->dmai_private; + attr = &hp->dmai_attr; + if (attr == NULL) { + ddi_err(DER_PANIC, rdip, "DMA handle (%p) has NULL attr", hp); + /*NOTREACHED*/ + } + immu_flags = dma->dp_sleep_flags; + + ddip = NULL; + domain = device_domain(rdip, &ddip, immu_flags); + if (domain == NULL || domain->dom_did == 0 || ddip == NULL) { + ddi_err(DER_MODE, rdip, "Attempt to unmap DVMA for " + "a device without domain or with an uninitialized " + "domain"); + return (DDI_DMA_NORESOURCES); + } + + /* + * immu must be set in the domain. + */ + immu = domain->dom_immu; + ASSERT(immu); + if (domain->dom_did == IMMU_UNITY_DID) { + ASSERT(domain == immu->immu_unity_domain); + /* + * domain is unity, nothing to do here, let the rootnex + * code free the cookies. + */ + return (DDI_DMA_USE_PHYSICAL); + } + + dma = hp->dmai_private; + if (dma == NULL) { + ddi_err(DER_PANIC, rdip, "DVMA unmap: DMA handle (%p) has " + "no private dma structure", hp); + /*NOTREACHED*/ + } + + /* free all cookies */ + cookie_free(dma, immu, domain, ddip, rdip); + + /* flush caches */ + rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); + immu_regs_context_flush(immu, 0, 0, domain->dom_did, CONTEXT_DSI); + rw_exit(&(immu->immu_ctx_rwlock)); + immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, TLB_IVA_WHOLE, + IOTLB_DSI); + immu_regs_wbf_flush(immu); + + return (DDI_SUCCESS); +} + +immu_devi_t * +immu_devi_get(dev_info_t *rdip) +{ + immu_devi_t *immu_devi; + + mutex_enter(&DEVI(rdip)->devi_lock); + immu_devi = DEVI(rdip)->devi_iommu; + mutex_exit(&DEVI(rdip)->devi_lock); + + return (immu_devi); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu_intrmap.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,1000 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + + +#include <sys/apic.h> +#include <vm/hat_i86.h> +#include <sys/sysmacros.h> +#include <sys/smp_impldefs.h> +#include <sys/immu.h> + + +typedef struct intrmap_private { + immu_t *ir_immu; + uint16_t ir_idx; + uint32_t ir_sid_svt_sq; +} intrmap_private_t; + +#define INTRMAP_PRIVATE(airq) ((intrmap_private_t *)airq->airq_intrmap_private) +#define AIRQ_PRIVATE(airq) (airq->airq_intrmap_private) + +/* interrupt remapping table entry */ +typedef struct intrmap_rte { + uint64_t lo; + uint64_t hi; +} intrmap_rte_t; + +#define IRTE_HIGH(sid_svt_sq) (sid_svt_sq) +#define IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p) \ + (((uint64_t)(dst) << 32) | \ + ((uint64_t)(vector) << 16) | \ + ((uint64_t)(dlm) << 5) | \ + ((uint64_t)(tm) << 4) | \ + ((uint64_t)(rh) << 3) | \ + ((uint64_t)(dm) << 2) | \ + ((uint64_t)(fpd) << 1) | \ + (p)) + +typedef enum { + SVT_NO_VERIFY = 0, /* no verification */ + SVT_ALL_VERIFY, /* using sid and sq to verify */ + SVT_BUS_VERIFY, /* verify #startbus and #endbus */ + SVT_RSVD +} intrmap_svt_t; + +typedef enum { + SQ_VERIFY_ALL = 0, /* verify all 16 bits */ + SQ_VERIFY_IGR_1, /* ignore bit 3 */ + SQ_VERIFY_IGR_2, /* ignore bit 2-3 */ + SQ_VERIFY_IGR_3 /* ignore bit 1-3 */ +} intrmap_sq_t; + +/* + * S field of the Interrupt Remapping Table Address Register + * the size of the interrupt remapping table is 1 << (immu_intrmap_irta_s + 1) + */ +static uint_t intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE; + +/* + * If true, arrange to suppress broadcast EOI by setting edge-triggered mode + * even for level-triggered interrupts in the interrupt-remapping engine. + * If false, broadcast EOI can still be suppressed if the CPU supports the + * APIC_SVR_SUPPRESS_BROADCAST_EOI bit. In both cases, the IOAPIC is still + * programmed with the correct trigger mode, and pcplusmp must send an EOI + * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the + * missing broadcast EOI. + */ +static int intrmap_suppress_brdcst_eoi = 0; + +/* + * whether verify the source id of interrupt request + */ +static int intrmap_enable_sid_verify = 0; + +/* fault types for DVMA remapping */ +static char *immu_dvma_faults[] = { + "Reserved", + "The present field in root-entry is Clear", + "The present field in context-entry is Clear", + "Hardware detected invalid programming of a context-entry", + "The DMA request attempted to access an address beyond max support", + "The Write field in a page-table entry is Clear when DMA write", + "The Read field in a page-table entry is Clear when DMA read", + "Access the next level page table resulted in error", + "Access the root-entry table resulted in error", + "Access the context-entry table resulted in error", + "Reserved field not initialized to zero in a present root-entry", + "Reserved field not initialized to zero in a present context-entry", + "Reserved field not initialized to zero in a present page-table entry", + "DMA blocked due to the Translation Type field in context-entry", + "Incorrect fault event reason number", +}; +#define DVMA_MAX_FAULTS (sizeof (immu_dvma_faults)/(sizeof (char *))) - 1 + +/* fault types for interrupt remapping */ +static char *immu_intrmap_faults[] = { + "reserved field set in IRTE", + "interrupt_index exceed the intr-remap table size", + "present field in IRTE is clear", + "hardware access intr-remap table address resulted in error", + "reserved field set in IRTE, include various conditional", + "hardware blocked an interrupt request in Compatibility format", + "remappable interrupt request blocked due to verification failure" +}; +#define INTRMAP_MAX_FAULTS \ + (sizeof (immu_intrmap_faults) / (sizeof (char *))) - 1 + +/* Function prototypes */ +static int immu_intrmap_init(int apic_mode); +static void immu_intrmap_switchon(int suppress_brdcst_eoi); +static void immu_intrmap_alloc(apic_irq_t *irq_ptr); +static void immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data); +static void immu_intrmap_free(apic_irq_t *irq_ptr); +static void immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt); +static void immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs); + +static struct apic_intrmap_ops intrmap_ops = { + immu_intrmap_init, + immu_intrmap_switchon, + immu_intrmap_alloc, + immu_intrmap_map, + immu_intrmap_free, + immu_intrmap_rdt, + immu_intrmap_msi, +}; + +/* apic mode, APIC/X2APIC */ +static int intrmap_apic_mode = LOCAL_APIC; + + +/* + * helper functions + */ +static uint_t +bitset_find_free(bitset_t *b, uint_t post) +{ + uint_t i; + uint_t cap = bitset_capacity(b); + + if (post == cap) + post = 0; + + ASSERT(post < cap); + + for (i = post; i < cap; i++) { + if (!bitset_in_set(b, i)) + return (i); + } + + for (i = 0; i < post; i++) { + if (!bitset_in_set(b, i)) + return (i); + } + + return (INTRMAP_IDX_FULL); /* no free index */ +} + +/* + * helper function to find 'count' contigous free + * interrupt remapping table entries + */ +static uint_t +bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count) +{ + uint_t i, j; + uint_t cap = bitset_capacity(b); + + if (post == INTRMAP_IDX_FULL) { + return (INTRMAP_IDX_FULL); + } + + if (count > cap) + return (INTRMAP_IDX_FULL); + + ASSERT(post < cap); + + for (i = post; (i + count) <= cap; i++) { + for (j = 0; j < count; j++) { + if (bitset_in_set(b, (i + j))) { + i = i + j; + break; + } + if (j == count - 1) + return (i); + } + } + + for (i = 0; (i < post) && ((i + count) <= cap); i++) { + for (j = 0; j < count; j++) { + if (bitset_in_set(b, (i + j))) { + i = i + j; + break; + } + if (j == count - 1) + return (i); + } + } + + return (INTRMAP_IDX_FULL); /* no free index */ +} + +/* alloc one interrupt remapping table entry */ +static int +alloc_tbl_entry(intrmap_t *intrmap) +{ + uint32_t idx; + + for (;;) { + mutex_enter(&intrmap->intrmap_lock); + idx = intrmap->intrmap_free; + if (idx != INTRMAP_IDX_FULL) { + bitset_add(&intrmap->intrmap_map, idx); + intrmap->intrmap_free = + bitset_find_free(&intrmap->intrmap_map, idx + 1); + mutex_exit(&intrmap->intrmap_lock); + break; + } + + /* no free intr entry, use compatible format intr */ + mutex_exit(&intrmap->intrmap_lock); + + if (intrmap_apic_mode != LOCAL_X2APIC) { + break; + } + + /* + * x2apic mode not allowed compatible + * interrupt + */ + delay(IMMU_ALLOC_RESOURCE_DELAY); + } + + return (idx); +} + +/* alloc 'cnt' contigous interrupt remapping table entries */ +static int +alloc_tbl_multi_entries(intrmap_t *intrmap, uint_t cnt) +{ + uint_t idx, pos, i; + + for (; ; ) { + mutex_enter(&intrmap->intrmap_lock); + pos = intrmap->intrmap_free; + idx = bitset_find_multi_free(&intrmap->intrmap_map, pos, cnt); + + if (idx != INTRMAP_IDX_FULL) { + if (idx <= pos && pos < (idx + cnt)) { + intrmap->intrmap_free = bitset_find_free( + &intrmap->intrmap_map, idx + cnt); + } + for (i = 0; i < cnt; i++) { + bitset_add(&intrmap->intrmap_map, idx + i); + } + mutex_exit(&intrmap->intrmap_lock); + } + + mutex_exit(&intrmap->intrmap_lock); + + if (intrmap_apic_mode != LOCAL_X2APIC) { + break; + } + + /* x2apic mode not allowed comapitible interrupt */ + delay(IMMU_ALLOC_RESOURCE_DELAY); + } + + return (idx); +} + +/* init interrupt remapping table */ +static int +init_unit(immu_t *immu) +{ + intrmap_t *intrmap; + size_t size; + + ddi_dma_attr_t intrmap_dma_attr = { + DMA_ATTR_V0, + 0U, + 0xffffffffU, + 0xffffffffU, + MMU_PAGESIZE, /* page aligned */ + 0x1, + 0x1, + 0xffffffffU, + 0xffffffffU, + 1, + 4, + 0 + }; + + ddi_device_acc_attr_t intrmap_acc_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC + }; + + if (intrmap_apic_mode == LOCAL_X2APIC) { + if (!IMMU_ECAP_GET_EIM(immu->immu_regs_excap)) { + return (DDI_FAILURE); + } + } + + if (intrmap_irta_s > INTRMAP_MAX_IRTA_SIZE) { + intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE; + } + + intrmap = kmem_zalloc(sizeof (intrmap_t), KM_SLEEP); + + if (ddi_dma_alloc_handle(immu->immu_dip, + &intrmap_dma_attr, + DDI_DMA_SLEEP, + NULL, + &(intrmap->intrmap_dma_hdl)) != DDI_SUCCESS) { + kmem_free(intrmap, sizeof (intrmap_t)); + return (DDI_FAILURE); + } + + intrmap->intrmap_size = 1 << (intrmap_irta_s + 1); + size = intrmap->intrmap_size * INTRMAP_RTE_SIZE; + if (ddi_dma_mem_alloc(intrmap->intrmap_dma_hdl, + size, + &intrmap_acc_attr, + DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, + DDI_DMA_SLEEP, + NULL, + &(intrmap->intrmap_vaddr), + &size, + &(intrmap->intrmap_acc_hdl)) != DDI_SUCCESS) { + ddi_dma_free_handle(&(intrmap->intrmap_dma_hdl)); + kmem_free(intrmap, sizeof (intrmap_t)); + return (DDI_FAILURE); + } + + ASSERT(!((uintptr_t)intrmap->intrmap_vaddr & MMU_PAGEOFFSET)); + bzero(intrmap->intrmap_vaddr, size); + intrmap->intrmap_paddr = pfn_to_pa( + hat_getpfnum(kas.a_hat, intrmap->intrmap_vaddr)); + + mutex_init(&(intrmap->intrmap_lock), NULL, MUTEX_DRIVER, NULL); + bitset_init(&intrmap->intrmap_map); + bitset_resize(&intrmap->intrmap_map, intrmap->intrmap_size); + intrmap->intrmap_free = 0; + + immu->immu_intrmap = intrmap; + + return (DDI_SUCCESS); +} + +static void +get_immu(apic_irq_t *irq_ptr) +{ + immu_t *immu = NULL; + + ASSERT(INTRMAP_PRIVATE(irq_ptr)->ir_immu == NULL); + + if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { + immu = immu_dmar_ioapic_immu(irq_ptr->airq_ioapicindex); + } else { + if (irq_ptr->airq_dip != NULL) { + immu = immu_dmar_get_immu(irq_ptr->airq_dip); + } + } + + if (immu && (immu->immu_intrmap_running == B_TRUE)) { + INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu; + } +} + +static int +get_top_pcibridge(dev_info_t *dip, void *arg) +{ + dev_info_t **topdipp = arg; + immu_devi_t *immu_devi; + + mutex_enter(&(DEVI(dip)->devi_lock)); + immu_devi = DEVI(dip)->devi_iommu; + mutex_exit(&(DEVI(dip)->devi_lock)); + + if (immu_devi == NULL || immu_devi->imd_pcib_type == IMMU_PCIB_BAD || + immu_devi->imd_pcib_type == IMMU_PCIB_ENDPOINT) { + return (DDI_WALK_CONTINUE); + } + + *topdipp = dip; + + return (DDI_WALK_CONTINUE); +} + +static dev_info_t * +intrmap_top_pcibridge(dev_info_t *rdip) +{ + dev_info_t *top_pcibridge = NULL; + + if (immu_walk_ancestor(rdip, NULL, get_top_pcibridge, + &top_pcibridge, NULL, 0) != DDI_SUCCESS) { + return (NULL); + } + + return (top_pcibridge); +} + +/* function to get interrupt request source id */ +static void +get_sid(apic_irq_t *irq_ptr) +{ + dev_info_t *dip, *pdip; + immu_devi_t *immu_devi; + uint16_t sid; + uchar_t svt, sq; + + if (!intrmap_enable_sid_verify) { + return; + } + + if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { + /* for interrupt through I/O APIC */ + sid = immu_dmar_ioapic_sid(irq_ptr->airq_ioapicindex); + svt = SVT_ALL_VERIFY; + sq = SQ_VERIFY_ALL; + } else { + /* MSI/MSI-X interrupt */ + dip = irq_ptr->airq_dip; + ASSERT(dip); + pdip = intrmap_top_pcibridge(dip); + ASSERT(pdip); + immu_devi = DEVI(pdip)->devi_iommu; + ASSERT(immu_devi); + if (immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCI) { + /* device behind pcie to pci bridge */ + sid = (immu_devi->imd_bus << 8) | immu_devi->imd_sec; + svt = SVT_BUS_VERIFY; + sq = SQ_VERIFY_ALL; + } else { + /* pcie device or device behind pci to pci bridge */ + sid = (immu_devi->imd_bus << 8) | + immu_devi->imd_devfunc; + svt = SVT_ALL_VERIFY; + sq = SQ_VERIFY_ALL; + } + } + + INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq = + sid | (svt << 18) | (sq << 16); +} + +static void +intrmap_enable(immu_t *immu) +{ + intrmap_t *intrmap; + uint64_t irta_reg; + + intrmap = immu->immu_intrmap; + + irta_reg = intrmap->intrmap_paddr | intrmap_irta_s; + if (intrmap_apic_mode == LOCAL_X2APIC) { + irta_reg |= (0x1 << 11); + } + + immu_regs_intrmap_enable(immu, irta_reg); +} + +/* ####################################################################### */ + +/* + * immu_intr_handler() + * the fault event handler for a single immu unit + */ +int +immu_intr_handler(immu_t *immu) +{ + uint32_t status; + int index, fault_reg_offset; + int max_fault_index; + boolean_t found_fault; + dev_info_t *idip; + + mutex_enter(&(immu->immu_intr_lock)); + mutex_enter(&(immu->immu_regs_lock)); + + /* read the fault status */ + status = immu_regs_get32(immu, IMMU_REG_FAULT_STS); + + idip = immu->immu_dip; + ASSERT(idip); + + /* check if we have a pending fault for this immu unit */ + if ((status & IMMU_FAULT_STS_PPF) == 0) { + mutex_exit(&(immu->immu_regs_lock)); + mutex_exit(&(immu->immu_intr_lock)); + return (DDI_INTR_UNCLAIMED); + } + + /* + * handle all primary pending faults + */ + index = IMMU_FAULT_GET_INDEX(status); + max_fault_index = IMMU_CAP_GET_NFR(immu->immu_regs_cap) - 1; + fault_reg_offset = IMMU_CAP_GET_FRO(immu->immu_regs_cap); + + found_fault = B_FALSE; + _NOTE(CONSTCOND) + while (1) { + uint64_t val; + uint8_t fault_reason; + uint8_t fault_type; + uint16_t sid; + uint64_t pg_addr; + uint64_t idx; + + /* read the higher 64bits */ + val = immu_regs_get64(immu, fault_reg_offset + index * 16 + 8); + + /* check if this fault register has pending fault */ + if (!IMMU_FRR_GET_F(val)) { + break; + } + + found_fault = B_TRUE; + + /* get the fault reason, fault type and sid */ + fault_reason = IMMU_FRR_GET_FR(val); + fault_type = IMMU_FRR_GET_FT(val); + sid = IMMU_FRR_GET_SID(val); + + /* read the first 64bits */ + val = immu_regs_get64(immu, fault_reg_offset + index * 16); + pg_addr = val & IMMU_PAGEMASK; + idx = val >> 48; + + /* clear the fault */ + immu_regs_put32(immu, fault_reg_offset + index * 16 + 12, + (((uint32_t)1) << 31)); + + /* report the fault info */ + if (fault_reason < 0x20) { + /* immu-remapping fault */ + ddi_err(DER_WARN, idip, + "generated a fault event when translating DMA %s\n" + "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), " + "the reason is:\n\t %s", + fault_type ? "read" : "write", pg_addr, + (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7, + immu_dvma_faults[MIN(fault_reason, + DVMA_MAX_FAULTS)]); + } else if (fault_reason < 0x27) { + /* intr-remapping fault */ + ddi_err(DER_WARN, idip, + "generated a fault event when translating " + "interrupt request\n" + "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), " + "the reason is:\n\t %s", + idx, + (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7, + immu_intrmap_faults[MIN((fault_reason - 0x20), + INTRMAP_MAX_FAULTS)]); + } else { + ddi_err(DER_WARN, idip, "Unknown fault reason: 0x%x", + fault_reason); + } + + index++; + if (index > max_fault_index) + index = 0; + } + + /* Clear the fault */ + if (!found_fault) { + ddi_err(DER_MODE, idip, + "Fault register set but no fault present"); + } + immu_regs_put32(immu, IMMU_REG_FAULT_STS, 1); + mutex_exit(&(immu->immu_regs_lock)); + mutex_exit(&(immu->immu_intr_lock)); + return (DDI_INTR_CLAIMED); +} +/* ######################################################################### */ + +/* + * Interrupt remap entry points + */ + +/* initialize interrupt remapping */ +static int +immu_intrmap_init(int apic_mode) +{ + immu_t *immu; + int error = DDI_FAILURE; + + if (immu_intrmap_enable == B_FALSE) { + return (DDI_SUCCESS); + } + + intrmap_apic_mode = apic_mode; + + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + if ((immu->immu_intrmap_running == B_TRUE) && + IMMU_ECAP_GET_IR(immu->immu_regs_excap)) { + if (init_unit(immu) == DDI_SUCCESS) { + error = DDI_SUCCESS; + } + } + } + + /* + * if all IOMMU units disable intr remapping, + * return FAILURE + */ + return (error); +} + + + +/* enable interrupt remapping */ +static void +immu_intrmap_switchon(int suppress_brdcst_eoi) +{ + immu_t *immu; + + + intrmap_suppress_brdcst_eoi = suppress_brdcst_eoi; + + immu = list_head(&immu_list); + for (; immu; immu = list_next(&immu_list, immu)) { + if (immu->immu_intrmap_setup == B_TRUE) { + intrmap_enable(immu); + } + } +} + +/* alloc remapping entry for the interrupt */ +static void +immu_intrmap_alloc(apic_irq_t *irq_ptr) +{ + immu_t *immu; + intrmap_t *intrmap; + uint32_t idx, cnt, i; + uint_t vector, irqno; + uint32_t sid_svt_sq; + + if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE || + AIRQ_PRIVATE(irq_ptr) != NULL) { + return; + } + + AIRQ_PRIVATE(irq_ptr) = + kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP); + + get_immu(irq_ptr); + + immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu; + if (immu == NULL) { + goto intrmap_disable; + } + + intrmap = immu->immu_intrmap; + + if (irq_ptr->airq_mps_intr_index == MSI_INDEX) { + cnt = irq_ptr->airq_intin_no; + } else { + cnt = 1; + } + + if (cnt == 1) { + idx = alloc_tbl_entry(intrmap); + } else { + idx = alloc_tbl_multi_entries(intrmap, cnt); + } + + if (idx == INTRMAP_IDX_FULL) { + goto intrmap_disable; + } + + INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx; + + get_sid(irq_ptr); + + if (cnt == 1) { + if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) { + immu_qinv_intr_one_cache(immu, idx); + } else { + immu_regs_wbf_flush(immu); + } + return; + } + + sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq; + + vector = irq_ptr->airq_vector; + + for (i = 1; i < cnt; i++) { + irqno = apic_vector_to_irq[vector + i]; + irq_ptr = apic_irq_table[irqno]; + + ASSERT(irq_ptr); + + AIRQ_PRIVATE(irq_ptr) = + kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP); + + INTRMAP_PRIVATE(irq_ptr)->ir_immu = immu; + INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq = sid_svt_sq; + INTRMAP_PRIVATE(irq_ptr)->ir_idx = idx + i; + } + + if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) { + immu_qinv_intr_caches(immu, idx, cnt); + } else { + immu_regs_wbf_flush(immu); + } + + return; + +intrmap_disable: + kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t)); + AIRQ_PRIVATE(irq_ptr) = INTRMAP_DISABLE; +} + + +/* remapping the interrupt */ +static void +immu_intrmap_map(apic_irq_t *irq_ptr, void *intrmap_data) +{ + immu_t *immu; + intrmap_t *intrmap; + ioapic_rdt_t *irdt = (ioapic_rdt_t *)intrmap_data; + msi_regs_t *mregs = (msi_regs_t *)intrmap_data; + intrmap_rte_t irte; + uint_t idx, i, cnt; + uint32_t dst, sid_svt_sq; + uchar_t vector, dlm, tm, rh, dm; + + if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) { + return; + } + + if (irq_ptr->airq_mps_intr_index == MSI_INDEX) { + cnt = irq_ptr->airq_intin_no; + } else { + cnt = 1; + } + + idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx; + immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu; + intrmap = immu->immu_intrmap; + sid_svt_sq = INTRMAP_PRIVATE(irq_ptr)->ir_sid_svt_sq; + vector = irq_ptr->airq_vector; + + if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { + dm = RDT_DM(irdt->ir_lo); + rh = 0; + tm = RDT_TM(irdt->ir_lo); + dlm = RDT_DLM(irdt->ir_lo); + dst = irdt->ir_hi; + + /* + * Mark the IRTE's TM as Edge to suppress broadcast EOI. + */ + if (intrmap_suppress_brdcst_eoi) { + tm = TRIGGER_MODE_EDGE; + } + } else { + dm = MSI_ADDR_DM_PHYSICAL; + rh = MSI_ADDR_RH_FIXED; + tm = TRIGGER_MODE_EDGE; + dlm = 0; + dst = mregs->mr_addr; + } + + if (intrmap_apic_mode == LOCAL_APIC) + dst = (dst & 0xFF) << 8; + + if (cnt == 1) { + irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1); + irte.hi = IRTE_HIGH(sid_svt_sq); + + /* set interrupt remapping table entry */ + bcopy(&irte, intrmap->intrmap_vaddr + + idx * INTRMAP_RTE_SIZE, + INTRMAP_RTE_SIZE); + + immu_qinv_intr_one_cache(immu, idx); + + } else { + vector = irq_ptr->airq_vector; + for (i = 0; i < cnt; i++) { + irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1); + irte.hi = IRTE_HIGH(sid_svt_sq); + + /* set interrupt remapping table entry */ + bcopy(&irte, intrmap->intrmap_vaddr + + idx * INTRMAP_RTE_SIZE, + INTRMAP_RTE_SIZE); + vector++; + idx++; + } + + immu_qinv_intr_caches(immu, idx, cnt); + } +} + +/* free the remapping entry */ +static void +immu_intrmap_free(apic_irq_t *irq_ptr) +{ + immu_t *immu; + intrmap_t *intrmap; + uint32_t idx; + + if (AIRQ_PRIVATE(irq_ptr) == INTRMAP_DISABLE) { + AIRQ_PRIVATE(irq_ptr) = NULL; + return; + } + + immu = INTRMAP_PRIVATE(irq_ptr)->ir_immu; + intrmap = immu->immu_intrmap; + idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx; + + bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE, + INTRMAP_RTE_SIZE); + + immu_qinv_intr_one_cache(immu, idx); + + mutex_enter(&intrmap->intrmap_lock); + bitset_del(&intrmap->intrmap_map, idx); + if (intrmap->intrmap_free == INTRMAP_IDX_FULL) { + intrmap->intrmap_free = idx; + } + mutex_exit(&intrmap->intrmap_lock); + + kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intrmap_private_t)); + AIRQ_PRIVATE(irq_ptr) = NULL; +} + +/* record the ioapic rdt entry */ +static void +immu_intrmap_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt) +{ + uint32_t rdt_entry, tm, pol, idx, vector; + + rdt_entry = irdt->ir_lo; + + if (INTRMAP_PRIVATE(irq_ptr) != NULL) { + idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx; + tm = RDT_TM(rdt_entry); + pol = RDT_POL(rdt_entry); + vector = irq_ptr->airq_vector; + irdt->ir_lo = (tm << INTRMAP_IOAPIC_TM_SHIFT) | + (pol << INTRMAP_IOAPIC_POL_SHIFT) | + ((idx >> 15) << INTRMAP_IOAPIC_IDX15_SHIFT) | + vector; + irdt->ir_hi = (idx << INTRMAP_IOAPIC_IDX_SHIFT) | + (1 << INTRMAP_IOAPIC_FORMAT_SHIFT); + } else { + irdt->ir_hi <<= APIC_ID_BIT_OFFSET; + } +} + +/* record the msi interrupt structure */ +/*ARGSUSED*/ +static void +immu_intrmap_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs) +{ + uint_t idx; + + if (INTRMAP_PRIVATE(irq_ptr) != NULL) { + idx = INTRMAP_PRIVATE(irq_ptr)->ir_idx; + + mregs->mr_data = 0; + mregs->mr_addr = MSI_ADDR_HDR | + ((idx & 0x7fff) << INTRMAP_MSI_IDX_SHIFT) | + (1 << INTRMAP_MSI_FORMAT_SHIFT) | + (1 << INTRMAP_MSI_SHV_SHIFT) | + ((idx >> 15) << INTRMAP_MSI_IDX15_SHIFT); + } else { + mregs->mr_addr = MSI_ADDR_HDR | + (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | + (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | + (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); + mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | + mregs->mr_data; + } +} + +/* ######################################################################### */ +/* + * Functions exported by immu_intr.c + */ +void +immu_intrmap_setup(list_t *listp) +{ + immu_t *immu; + + /* + * Check if ACPI DMAR tables say that + * interrupt remapping is supported + */ + if (immu_dmar_intrmap_supported() == B_FALSE) { + return; + } + + /* + * Check if interrupt remapping is disabled. + */ + if (immu_intrmap_enable == B_FALSE) { + return; + } + + psm_vt_ops = &intrmap_ops; + + immu = list_head(listp); + for (; immu; immu = list_next(listp, immu)) { + mutex_init(&(immu->immu_intrmap_lock), NULL, + MUTEX_DEFAULT, NULL); + mutex_enter(&(immu->immu_intrmap_lock)); + immu->immu_intrmap_setup = B_TRUE; + mutex_exit(&(immu->immu_intrmap_lock)); + } +} + +void +immu_intrmap_startup(immu_t *immu) +{ + /* do nothing */ + mutex_enter(&(immu->immu_intrmap_lock)); + if (immu->immu_intrmap_setup == B_TRUE) { + immu->immu_intrmap_running = B_TRUE; + } + mutex_exit(&(immu->immu_intrmap_lock)); +} + +/* + * Register a Intel IOMMU unit (i.e. DMAR unit's) + * interrupt handler + */ +void +immu_intr_register(immu_t *immu) +{ + int irq, vect; + char intr_handler_name[IMMU_MAXNAMELEN]; + uint32_t msi_data; + uint32_t uaddr; + uint32_t msi_addr; + + msi_addr = (MSI_ADDR_HDR | + apic_cpus[0].aci_local_id & 0xFF) << ((MSI_ADDR_DEST_SHIFT) | + (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | + (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT)); + + if (intrmap_apic_mode == LOCAL_X2APIC) { + uaddr = (apic_cpus[0].aci_local_id & 0xFFFFFF00); + } else { + uaddr = 0; + } + + /* Dont need to hold immu_intr_lock since we are in boot */ + irq = psm_get_ipivect(IMMU_INTR_IPL, -1); + vect = apic_irq_table[irq]->airq_vector; + msi_data = ((MSI_DATA_DELIVERY_FIXED << + MSI_DATA_DELIVERY_SHIFT) | vect); + + (void) snprintf(intr_handler_name, sizeof (intr_handler_name), + "%s-intr-handler", immu->immu_name); + + (void) add_avintr((void *)NULL, IMMU_INTR_IPL, + (avfunc)(immu_intr_handler), intr_handler_name, irq, + (caddr_t)immu, NULL, NULL, NULL); + + immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr); + + (void) immu_intr_handler(immu); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu_qinv.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,918 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +#include <sys/ddi.h> +#include <sys/archsystm.h> +#include <vm/hat_i86.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/immu.h> + +/* invalidation queue table entry size */ +#define QINV_ENTRY_SIZE 0x10 + +/* max value of Queue Size field of Invalidation Queue Address Register */ +#define QINV_MAX_QUEUE_SIZE 0x7 + +/* status data size of invalidation wait descriptor */ +#define QINV_SYNC_DATA_SIZE 0x4 + +/* status data value of invalidation wait descriptor */ +#define QINV_SYNC_DATA_FENCE 1 +#define QINV_SYNC_DATA_UNFENCE 2 + +/* invalidation queue head and tail */ +#define QINV_IQA_HEAD(QH) BITX((QH), 18, 4) +#define QINV_IQA_TAIL_SHIFT 4 + +/* invalidation queue entry structure */ +typedef struct qinv_inv_dsc { + uint64_t lo; + uint64_t hi; +} qinv_dsc_t; + +/* + * struct iotlb_cache_node + * the pending data for iotlb flush + */ +typedef struct iotlb_pend_node { + dvcookie_t *icn_dvcookies; /* ptr to dvma cookie array */ + uint_t icn_count; /* valid cookie count */ + uint_t icn_array_size; /* array size */ + list_node_t node; +} qinv_iotlb_pend_node_t; + +/* + * struct iotlb_cache_head + * the pending head for the iotlb flush + */ +typedef struct iotlb_pend_head { + /* the pending node cache list */ + kmutex_t ich_mem_lock; + list_t ich_mem_list; +} qinv_iotlb_pend_head_t; + +/* + * qinv_iotlb_t + * pending data for qiueued invalidation iotlb flush + */ +typedef struct qinv_iotlb { + dvcookie_t *qinv_iotlb_dvcookies; + uint_t qinv_iotlb_count; + uint_t qinv_iotlb_size; + list_node_t qinv_iotlb_node; +} qinv_iotlb_t; + +/* physical contigous pages for invalidation queue */ +typedef struct qinv_mem { + kmutex_t qinv_mem_lock; + ddi_dma_handle_t qinv_mem_dma_hdl; + ddi_acc_handle_t qinv_mem_acc_hdl; + caddr_t qinv_mem_vaddr; + paddr_t qinv_mem_paddr; + uint_t qinv_mem_size; + uint16_t qinv_mem_head; + uint16_t qinv_mem_tail; +} qinv_mem_t; + + +/* + * invalidation queue state + * This structure describes the state information of the + * invalidation queue table and related status memeory for + * invalidation wait descriptor + * + * qinv_table - invalidation queue table + * qinv_sync - sync status memory for invalidation wait descriptor + * qinv_iotlb_pend_node - pending iotlb node + */ +typedef struct qinv { + qinv_mem_t qinv_table; + qinv_mem_t qinv_sync; + qinv_iotlb_pend_head_t qinv_pend_head; + qinv_iotlb_pend_node_t **qinv_iotlb_pend_node; +} qinv_t; + + +/* helper macro for making queue invalidation descriptor */ +#define INV_DSC_TYPE(dsc) ((dsc)->lo & 0xF) +#define CC_INV_DSC_HIGH (0) +#define CC_INV_DSC_LOW(fm, sid, did, g) (((uint64_t)(fm) << 48) | \ + ((uint64_t)(sid) << 32) | \ + ((uint64_t)(did) << 16) | \ + ((uint64_t)(g) << 4) | \ + 1) + +#define IOTLB_INV_DSC_HIGH(addr, ih, am) (((uint64_t)(addr)) | \ + ((uint64_t)(ih) << 6) | \ + ((uint64_t)(am))) + +#define IOTLB_INV_DSC_LOW(did, dr, dw, g) (((uint64_t)(did) << 16) | \ + ((uint64_t)(dr) << 7) | \ + ((uint64_t)(dw) << 6) | \ + ((uint64_t)(g) << 4) | \ + 2) + +#define DEV_IOTLB_INV_DSC_HIGH(addr, s) (((uint64_t)(addr)) | (s)) + +#define DEV_IOTLB_INV_DSC_LOW(sid, max_invs_pd) ( \ + ((uint64_t)(sid) << 32) | \ + ((uint64_t)(max_invs_pd) << 16) | \ + 3) + +#define IEC_INV_DSC_HIGH (0) +#define IEC_INV_DSC_LOW(idx, im, g) (((uint64_t)(idx) << 32) | \ + ((uint64_t)(im) << 27) | \ + ((uint64_t)(g) << 4) | \ + 4) + +#define INV_WAIT_DSC_HIGH(saddr) ((uint64_t)(saddr)) + +#define INV_WAIT_DSC_LOW(sdata, fn, sw, iflag) (((uint64_t)(sdata) << 32) | \ + ((uint64_t)(fn) << 6) | \ + ((uint64_t)(sw) << 5) | \ + ((uint64_t)(iflag) << 4) | \ + 5) + +/* + * QS field of Invalidation Queue Address Register + * the size of invalidation queue is 1 << (qinv_iqa_qs + 8) + */ +static uint_t qinv_iqa_qs = 6; + +/* + * the invalidate desctiptor type of queued invalidation interface + */ +static char *qinv_dsc_type[] = { + "Reserved", + "Context Cache Invalidate Descriptor", + "IOTLB Invalidate Descriptor", + "Device-IOTLB Invalidate Descriptor", + "Interrupt Entry Cache Invalidate Descriptor", + "Invalidation Wait Descriptor", + "Incorrect queue invalidation type" +}; + +#define QINV_MAX_DSC_TYPE (sizeof (qinv_dsc_type) / sizeof (char *)) + +/* + * the queued invalidation interface functions + */ +static void qinv_submit_inv_dsc(immu_t *immu, qinv_dsc_t *dsc); +static void qinv_context_common(immu_t *immu, uint8_t function_mask, + uint16_t source_id, uint_t domain_id, ctt_inv_g_t type); +static void qinv_iotlb_common(immu_t *immu, uint_t domain_id, + uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type); +static void qinv_iec_common(immu_t *immu, uint_t iidx, + uint_t im, uint_t g); +static uint_t qinv_alloc_sync_mem_entry(immu_t *immu); +static void qinv_wait_async_unfence(immu_t *immu, + qinv_iotlb_pend_node_t *node); +static void qinv_wait_sync(immu_t *immu); +static int qinv_wait_async_finish(immu_t *immu, int *count); +/*LINTED*/ +static void qinv_wait_async_fence(immu_t *immu); +/*LINTED*/ +static void qinv_dev_iotlb_common(immu_t *immu, uint16_t sid, + uint64_t addr, uint_t size, uint_t max_invs_pd); + + +/* submit invalidation request descriptor to invalidation queue */ +static void +qinv_submit_inv_dsc(immu_t *immu, qinv_dsc_t *dsc) +{ + qinv_t *qinv; + qinv_mem_t *qinv_table; + uint_t tail; + + qinv = (qinv_t *)immu->immu_qinv; + qinv_table = &(qinv->qinv_table); + + mutex_enter(&qinv_table->qinv_mem_lock); + tail = qinv_table->qinv_mem_tail; + qinv_table->qinv_mem_tail++; + + if (qinv_table->qinv_mem_tail == qinv_table->qinv_mem_size) + qinv_table->qinv_mem_tail = 0; + + while (qinv_table->qinv_mem_head == qinv_table->qinv_mem_tail) { + /* + * inv queue table exhausted, wait hardware to fetch + * next descriptor + */ + qinv_table->qinv_mem_head = QINV_IQA_HEAD( + immu_regs_get64(immu, IMMU_REG_INVAL_QH)); + } + + bcopy(dsc, qinv_table->qinv_mem_vaddr + tail * QINV_ENTRY_SIZE, + QINV_ENTRY_SIZE); + + immu_regs_put64(immu, IMMU_REG_INVAL_QT, + qinv_table->qinv_mem_tail << QINV_IQA_TAIL_SHIFT); + + mutex_exit(&qinv_table->qinv_mem_lock); +} + +/* queued invalidation interface -- invalidate context cache */ +static void +qinv_context_common(immu_t *immu, uint8_t function_mask, + uint16_t source_id, uint_t domain_id, ctt_inv_g_t type) +{ + qinv_dsc_t dsc; + + dsc.lo = CC_INV_DSC_LOW(function_mask, source_id, domain_id, type); + dsc.hi = CC_INV_DSC_HIGH; + + qinv_submit_inv_dsc(immu, &dsc); +} + +/* queued invalidation interface -- invalidate iotlb */ +static void +qinv_iotlb_common(immu_t *immu, uint_t domain_id, + uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type) +{ + qinv_dsc_t dsc; + uint8_t dr = 0; + uint8_t dw = 0; + + if (IMMU_CAP_GET_DRD(immu->immu_regs_cap)) + dr = 1; + if (IMMU_CAP_GET_DWD(immu->immu_regs_cap)) + dw = 1; + + switch (type) { + case TLB_INV_G_PAGE: + if (!IMMU_CAP_GET_PSI(immu->immu_regs_cap) || + am > IMMU_CAP_GET_MAMV(immu->immu_regs_cap) || + addr & IMMU_PAGEOFFSET) { + type = TLB_INV_G_DOMAIN; + goto qinv_ignore_psi; + } + dsc.lo = IOTLB_INV_DSC_LOW(domain_id, dr, dw, type); + dsc.hi = IOTLB_INV_DSC_HIGH(addr, hint, am); + break; + + qinv_ignore_psi: + case TLB_INV_G_DOMAIN: + dsc.lo = IOTLB_INV_DSC_LOW(domain_id, dr, dw, type); + dsc.hi = 0; + break; + + case TLB_INV_G_GLOBAL: + dsc.lo = IOTLB_INV_DSC_LOW(0, dr, dw, type); + dsc.hi = 0; + break; + default: + ddi_err(DER_WARN, NULL, "incorrect iotlb flush type"); + return; + } + + qinv_submit_inv_dsc(immu, &dsc); +} + +/* queued invalidation interface -- invalidate dev_iotlb */ +static void +qinv_dev_iotlb_common(immu_t *immu, uint16_t sid, + uint64_t addr, uint_t size, uint_t max_invs_pd) +{ + qinv_dsc_t dsc; + + dsc.lo = DEV_IOTLB_INV_DSC_LOW(sid, max_invs_pd); + dsc.hi = DEV_IOTLB_INV_DSC_HIGH(addr, size); + + qinv_submit_inv_dsc(immu, &dsc); +} + +/* queued invalidation interface -- invalidate interrupt entry cache */ +static void +qinv_iec_common(immu_t *immu, uint_t iidx, uint_t im, uint_t g) +{ + qinv_dsc_t dsc; + + dsc.lo = IEC_INV_DSC_LOW(iidx, im, g); + dsc.hi = IEC_INV_DSC_HIGH; + + qinv_submit_inv_dsc(immu, &dsc); +} + +/* + * alloc free entry from sync status table + */ +static uint_t +qinv_alloc_sync_mem_entry(immu_t *immu) +{ + qinv_mem_t *sync_mem; + uint_t tail; + qinv_t *qinv; + + qinv = (qinv_t *)immu->immu_qinv; + sync_mem = &qinv->qinv_sync; + +sync_mem_exhausted: + mutex_enter(&sync_mem->qinv_mem_lock); + tail = sync_mem->qinv_mem_tail; + sync_mem->qinv_mem_tail++; + if (sync_mem->qinv_mem_tail == sync_mem->qinv_mem_size) + sync_mem->qinv_mem_tail = 0; + + if (sync_mem->qinv_mem_head == sync_mem->qinv_mem_tail) { + /* should never happen */ + ddi_err(DER_WARN, NULL, "sync mem exhausted"); + sync_mem->qinv_mem_tail = tail; + mutex_exit(&sync_mem->qinv_mem_lock); + delay(IMMU_ALLOC_RESOURCE_DELAY); + goto sync_mem_exhausted; + } + mutex_exit(&sync_mem->qinv_mem_lock); + + return (tail); +} + +/* + * queued invalidation interface -- invalidation wait descriptor + * fence flag not set, need status data to indicate the invalidation + * wait descriptor completion + */ +static void +qinv_wait_async_unfence(immu_t *immu, qinv_iotlb_pend_node_t *node) +{ + qinv_dsc_t dsc; + qinv_mem_t *sync_mem; + uint64_t saddr; + uint_t tail; + qinv_t *qinv; + + qinv = (qinv_t *)immu->immu_qinv; + sync_mem = &qinv->qinv_sync; + tail = qinv_alloc_sync_mem_entry(immu); + + /* plant an iotlb pending node */ + qinv->qinv_iotlb_pend_node[tail] = node; + + saddr = sync_mem->qinv_mem_paddr + tail * QINV_SYNC_DATA_SIZE; + + /* + * sdata = QINV_SYNC_DATA_UNFENCE, fence = 0, sw = 1, if = 0 + * indicate the invalidation wait descriptor completion by + * performing a coherent DWORD write to the status address, + * not by generating an invalidation completion event + */ + dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_UNFENCE, 0, 1, 0); + dsc.hi = INV_WAIT_DSC_HIGH(saddr); + + qinv_submit_inv_dsc(immu, &dsc); +} + +/* + * queued invalidation interface -- invalidation wait descriptor + * fence flag set, indicate descriptors following the invalidation + * wait descriptor must be processed by hardware only after the + * invalidation wait descriptor completes. + */ +static void +qinv_wait_async_fence(immu_t *immu) +{ + qinv_dsc_t dsc; + + /* sw = 0, fence = 1, iflag = 0 */ + dsc.lo = INV_WAIT_DSC_LOW(0, 1, 0, 0); + dsc.hi = 0; + qinv_submit_inv_dsc(immu, &dsc); +} + +/* + * queued invalidation interface -- invalidation wait descriptor + * wait until the invalidation request finished + */ +static void +qinv_wait_sync(immu_t *immu) +{ + qinv_dsc_t dsc; + qinv_mem_t *sync_mem; + uint64_t saddr; + uint_t tail; + qinv_t *qinv; + volatile uint32_t *status; + + qinv = (qinv_t *)immu->immu_qinv; + sync_mem = &qinv->qinv_sync; + tail = qinv_alloc_sync_mem_entry(immu); + saddr = sync_mem->qinv_mem_paddr + tail * QINV_SYNC_DATA_SIZE; + status = (uint32_t *)(sync_mem->qinv_mem_vaddr + tail * + QINV_SYNC_DATA_SIZE); + + /* + * sdata = QINV_SYNC_DATA_FENCE, fence = 1, sw = 1, if = 0 + * indicate the invalidation wait descriptor completion by + * performing a coherent DWORD write to the status address, + * not by generating an invalidation completion event + */ + dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_FENCE, 1, 1, 0); + dsc.hi = INV_WAIT_DSC_HIGH(saddr); + + qinv_submit_inv_dsc(immu, &dsc); + + while ((*status) != QINV_SYNC_DATA_FENCE) + iommu_cpu_nop(); + *status = QINV_SYNC_DATA_UNFENCE; +} + +/* get already completed invalidation wait requests */ +static int +qinv_wait_async_finish(immu_t *immu, int *cnt) +{ + qinv_mem_t *sync_mem; + int index; + qinv_t *qinv; + volatile uint32_t *value; + + ASSERT((*cnt) == 0); + + qinv = (qinv_t *)immu->immu_qinv; + sync_mem = &qinv->qinv_sync; + + mutex_enter(&sync_mem->qinv_mem_lock); + index = sync_mem->qinv_mem_head; + value = (uint32_t *)(sync_mem->qinv_mem_vaddr + index + * QINV_SYNC_DATA_SIZE); + while (*value == QINV_SYNC_DATA_UNFENCE) { + *value = 0; + (*cnt)++; + sync_mem->qinv_mem_head++; + if (sync_mem->qinv_mem_head == sync_mem->qinv_mem_size) { + sync_mem->qinv_mem_head = 0; + value = (uint32_t *)(sync_mem->qinv_mem_vaddr); + } else + value = (uint32_t *)((char *)value + + QINV_SYNC_DATA_SIZE); + } + + mutex_exit(&sync_mem->qinv_mem_lock); + if ((*cnt) > 0) + return (index); + else + return (-1); +} + +/* + * call ddi_dma_mem_alloc to allocate physical contigous + * pages for invalidation queue table + */ +static int +qinv_setup(immu_t *immu) +{ + qinv_t *qinv; + size_t size; + + ddi_dma_attr_t qinv_dma_attr = { + DMA_ATTR_V0, + 0U, + 0xffffffffU, + 0xffffffffU, + MMU_PAGESIZE, /* page aligned */ + 0x1, + 0x1, + 0xffffffffU, + 0xffffffffU, + 1, + 4, + 0 + }; + + ddi_device_acc_attr_t qinv_acc_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC + }; + + mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DRIVER, NULL); + + + mutex_enter(&(immu->immu_qinv_lock)); + + immu->immu_qinv = NULL; + if (!IMMU_ECAP_GET_QI(immu->immu_regs_excap) || + immu_qinv_enable == B_FALSE) { + mutex_exit(&(immu->immu_qinv_lock)); + return (DDI_SUCCESS); + } + + if (qinv_iqa_qs > QINV_MAX_QUEUE_SIZE) + qinv_iqa_qs = QINV_MAX_QUEUE_SIZE; + + qinv = kmem_zalloc(sizeof (qinv_t), KM_SLEEP); + + if (ddi_dma_alloc_handle(root_devinfo, + &qinv_dma_attr, DDI_DMA_SLEEP, NULL, + &(qinv->qinv_table.qinv_mem_dma_hdl)) != DDI_SUCCESS) { + ddi_err(DER_WARN, root_devinfo, + "alloc invalidation queue table handler failed"); + goto queue_table_handle_failed; + } + + if (ddi_dma_alloc_handle(root_devinfo, + &qinv_dma_attr, DDI_DMA_SLEEP, NULL, + &(qinv->qinv_sync.qinv_mem_dma_hdl)) != DDI_SUCCESS) { + ddi_err(DER_WARN, root_devinfo, + "alloc invalidation queue sync mem handler failed"); + goto sync_table_handle_failed; + } + + qinv->qinv_table.qinv_mem_size = (1 << (qinv_iqa_qs + 8)); + size = qinv->qinv_table.qinv_mem_size * QINV_ENTRY_SIZE; + + /* alloc physical contiguous pages for invalidation queue */ + if (ddi_dma_mem_alloc(qinv->qinv_table.qinv_mem_dma_hdl, + size, + &qinv_acc_attr, + DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, + DDI_DMA_SLEEP, + NULL, + &(qinv->qinv_table.qinv_mem_vaddr), + &size, + &(qinv->qinv_table.qinv_mem_acc_hdl)) != DDI_SUCCESS) { + ddi_err(DER_WARN, root_devinfo, + "alloc invalidation queue table failed"); + goto queue_table_mem_failed; + } + + ASSERT(!((uintptr_t)qinv->qinv_table.qinv_mem_vaddr & MMU_PAGEOFFSET)); + bzero(qinv->qinv_table.qinv_mem_vaddr, size); + + /* get the base physical address of invalidation request queue */ + qinv->qinv_table.qinv_mem_paddr = pfn_to_pa( + hat_getpfnum(kas.a_hat, qinv->qinv_table.qinv_mem_vaddr)); + + qinv->qinv_table.qinv_mem_head = qinv->qinv_table.qinv_mem_tail = 0; + + qinv->qinv_sync.qinv_mem_size = qinv->qinv_table.qinv_mem_size; + size = qinv->qinv_sync.qinv_mem_size * QINV_SYNC_DATA_SIZE; + + /* alloc status memory for invalidation wait descriptor */ + if (ddi_dma_mem_alloc(qinv->qinv_sync.qinv_mem_dma_hdl, + size, + &qinv_acc_attr, + DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, + DDI_DMA_SLEEP, + NULL, + &(qinv->qinv_sync.qinv_mem_vaddr), + &size, + &(qinv->qinv_sync.qinv_mem_acc_hdl)) != DDI_SUCCESS) { + ddi_err(DER_WARN, root_devinfo, + "alloc invalidation queue sync mem failed"); + goto sync_table_mem_failed; + } + + ASSERT(!((uintptr_t)qinv->qinv_sync.qinv_mem_vaddr & MMU_PAGEOFFSET)); + bzero(qinv->qinv_sync.qinv_mem_vaddr, size); + qinv->qinv_sync.qinv_mem_paddr = pfn_to_pa( + hat_getpfnum(kas.a_hat, qinv->qinv_sync.qinv_mem_vaddr)); + + qinv->qinv_sync.qinv_mem_head = qinv->qinv_sync.qinv_mem_tail = 0; + + mutex_init(&(qinv->qinv_table.qinv_mem_lock), NULL, MUTEX_DRIVER, NULL); + mutex_init(&(qinv->qinv_sync.qinv_mem_lock), NULL, MUTEX_DRIVER, NULL); + + /* + * init iotlb pend node for submitting invalidation iotlb + * queue request + */ + qinv->qinv_iotlb_pend_node = (qinv_iotlb_pend_node_t **) + kmem_zalloc(qinv->qinv_sync.qinv_mem_size + * sizeof (qinv_iotlb_pend_node_t *), KM_SLEEP); + + /* set invalidation queue structure */ + immu->immu_qinv = qinv; + + mutex_exit(&(immu->immu_qinv_lock)); + + return (DDI_SUCCESS); + +sync_table_mem_failed: + ddi_dma_mem_free(&(qinv->qinv_table.qinv_mem_acc_hdl)); + +queue_table_mem_failed: + ddi_dma_free_handle(&(qinv->qinv_sync.qinv_mem_dma_hdl)); + +sync_table_handle_failed: + ddi_dma_free_handle(&(qinv->qinv_table.qinv_mem_dma_hdl)); + +queue_table_handle_failed: + kmem_free(qinv, sizeof (qinv_t)); + + mutex_exit(&(immu->immu_qinv_lock)); + + return (DDI_FAILURE); +} + +/* + * ########################################################################### + * + * Functions exported by immu_qinv.c + * + * ########################################################################### + */ + +/* + * initialize invalidation request queue structure. + */ +void +immu_qinv_setup(list_t *listp) +{ + immu_t *immu; + + if (immu_qinv_enable == B_FALSE) { + return; + } + + immu = list_head(listp); + for (; immu; immu = list_next(listp, immu)) { + if (qinv_setup(immu) == DDI_SUCCESS) { + immu->immu_qinv_setup = B_TRUE; + } + } +} + +void +immu_qinv_startup(immu_t *immu) +{ + qinv_t *qinv; + uint64_t qinv_reg_value; + + if (immu->immu_qinv_setup == B_FALSE) { + return; + } + + qinv = (qinv_t *)immu->immu_qinv; + qinv_reg_value = qinv->qinv_table.qinv_mem_paddr | qinv_iqa_qs; + immu_regs_qinv_enable(immu, qinv_reg_value); + immu->immu_qinv_running = B_TRUE; +} + +/* + * queued invalidation interface + * function based context cache invalidation + */ +void +immu_qinv_context_fsi(immu_t *immu, uint8_t function_mask, + uint16_t source_id, uint_t domain_id) +{ + qinv_context_common(immu, function_mask, source_id, + domain_id, CTT_INV_G_DEVICE); + qinv_wait_sync(immu); +} + +/* + * queued invalidation interface + * domain based context cache invalidation + */ +void +immu_qinv_context_dsi(immu_t *immu, uint_t domain_id) +{ + qinv_context_common(immu, 0, 0, domain_id, CTT_INV_G_DOMAIN); + qinv_wait_sync(immu); +} + +/* + * queued invalidation interface + * invalidation global context cache + */ +void +immu_qinv_context_gbl(immu_t *immu) +{ + qinv_context_common(immu, 0, 0, 0, CTT_INV_G_GLOBAL); + qinv_wait_sync(immu); +} + +/* + * queued invalidation interface + * paged based iotlb invalidation + */ +void +immu_inv_iotlb_psi(immu_t *immu, uint_t domain_id, + uint64_t dvma, uint_t count, uint_t hint) +{ + uint_t am = 0; + uint_t max_am; + + max_am = IMMU_CAP_GET_MAMV(immu->immu_regs_cap); + + /* choose page specified invalidation */ + if (IMMU_CAP_GET_PSI(immu->immu_regs_cap)) { + while (am <= max_am) { + if ((ADDR_AM_OFFSET(IMMU_BTOP(dvma), am) + count) + <= ADDR_AM_MAX(am)) { + qinv_iotlb_common(immu, domain_id, + dvma, am, hint, TLB_INV_G_PAGE); + break; + } + am++; + } + if (am > max_am) { + qinv_iotlb_common(immu, domain_id, + dvma, 0, hint, TLB_INV_G_DOMAIN); + } + + /* choose domain invalidation */ + } else { + qinv_iotlb_common(immu, domain_id, dvma, + 0, hint, TLB_INV_G_DOMAIN); + } +} + +/* + * queued invalidation interface + * domain based iotlb invalidation + */ +void +immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id) +{ + qinv_iotlb_common(immu, domain_id, 0, 0, 0, TLB_INV_G_DOMAIN); + qinv_wait_sync(immu); +} + +/* + * queued invalidation interface + * global iotlb invalidation + */ +void +immu_qinv_iotlb_gbl(immu_t *immu) +{ + qinv_iotlb_common(immu, 0, 0, 0, 0, TLB_INV_G_GLOBAL); + qinv_wait_sync(immu); +} + + + +/* + * the plant wait operation for queued invalidation interface + */ +void +immu_qinv_plant(immu_t *immu, dvcookie_t *dvcookies, + uint_t count, uint_t array_size) +{ + qinv_t *qinv; + qinv_iotlb_pend_node_t *node = NULL; + qinv_iotlb_pend_head_t *head; + + qinv = (qinv_t *)immu->immu_qinv; + + head = &(qinv->qinv_pend_head); + mutex_enter(&(head->ich_mem_lock)); + node = list_head(&(head->ich_mem_list)); + if (node) { + list_remove(&(head->ich_mem_list), node); + } + mutex_exit(&(head->ich_mem_lock)); + + /* no cache, alloc one */ + if (node == NULL) { + node = kmem_zalloc(sizeof (qinv_iotlb_pend_node_t), KM_SLEEP); + } + node->icn_dvcookies = dvcookies; + node->icn_count = count; + node->icn_array_size = array_size; + + /* plant an invalidation wait descriptor, not wait its completion */ + qinv_wait_async_unfence(immu, node); +} + +/* + * the reap wait operation for queued invalidation interface + */ +void +immu_qinv_reap(immu_t *immu) +{ + int index, cnt = 0; + qinv_iotlb_pend_node_t *node; + qinv_iotlb_pend_head_t *head; + qinv_t *qinv; + + qinv = (qinv_t *)immu->immu_qinv; + head = &(qinv->qinv_pend_head); + + index = qinv_wait_async_finish(immu, &cnt); + + while (cnt--) { + node = qinv->qinv_iotlb_pend_node[index]; + if (node == NULL) + continue; + mutex_enter(&(head->ich_mem_lock)); + list_insert_head(&(head->ich_mem_list), node); + mutex_exit(&(head->ich_mem_lock)); + qinv->qinv_iotlb_pend_node[index] = NULL; + index++; + if (index == qinv->qinv_sync.qinv_mem_size) + index = 0; + } +} + + +/* queued invalidation interface -- global invalidate interrupt entry cache */ +void +immu_qinv_intr_global(immu_t *immu) +{ + qinv_iec_common(immu, 0, 0, IEC_INV_GLOBAL); + qinv_wait_sync(immu); +} + +/* queued invalidation interface -- invalidate single interrupt entry cache */ +void +immu_qinv_intr_one_cache(immu_t *immu, uint_t iidx) +{ + qinv_iec_common(immu, iidx, 0, IEC_INV_INDEX); + qinv_wait_sync(immu); +} + +/* queued invalidation interface -- invalidate interrupt entry caches */ +void +immu_qinv_intr_caches(immu_t *immu, uint_t iidx, uint_t cnt) +{ + uint_t i, mask = 0; + + ASSERT(cnt != 0); + + /* requested interrupt count is not a power of 2 */ + if (!ISP2(cnt)) { + for (i = 0; i < cnt; i++) { + qinv_iec_common(immu, iidx + cnt, 0, IEC_INV_INDEX); + } + qinv_wait_sync(immu); + return; + } + + while ((2 << mask) < cnt) { + mask++; + } + + if (mask > IMMU_ECAP_GET_MHMV(immu->immu_regs_excap)) { + for (i = 0; i < cnt; i++) { + qinv_iec_common(immu, iidx + cnt, 0, IEC_INV_INDEX); + } + qinv_wait_sync(immu); + return; + } + + qinv_iec_common(immu, iidx, mask, IEC_INV_INDEX); + + qinv_wait_sync(immu); +} + +void +immu_qinv_report_fault(immu_t *immu) +{ + uint16_t head; + qinv_dsc_t *dsc; + qinv_t *qinv; + + /* access qinv data */ + mutex_enter(&(immu->immu_qinv_lock)); + + qinv = (qinv_t *)(immu->immu_qinv); + + head = QINV_IQA_HEAD( + immu_regs_get64(immu, IMMU_REG_INVAL_QH)); + + dsc = (qinv_dsc_t *)(qinv->qinv_table.qinv_mem_vaddr + + (head * QINV_ENTRY_SIZE)); + + /* report the error */ + ddi_err(DER_WARN, immu->immu_dip, + "generated a fault when fetching a descriptor from the" + "\tinvalidation queue, or detects that the fetched" + "\tdescriptor is invalid. The head register is " + "0x%" PRIx64 + "\tthe type is %s", + head, + qinv_dsc_type[MIN(INV_DSC_TYPE(dsc), QINV_MAX_DSC_TYPE)]); + + mutex_exit(&(immu->immu_qinv_lock)); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/immu_regs.c Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,851 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * immu_regs.c - File that operates on a IMMU unit's regsiters + */ +#include <sys/dditypes.h> +#include <sys/ddi.h> +#include <sys/archsystm.h> +#include <sys/x86_archext.h> +#include <sys/spl.h> +#include <sys/immu.h> + +#define get_reg32(immu, offset) ddi_get32((immu)->immu_regs_handle, \ + (uint32_t *)(immu->immu_regs_addr + (offset))) +#define get_reg64(immu, offset) ddi_get64((immu)->immu_regs_handle, \ + (uint64_t *)(immu->immu_regs_addr + (offset))) +#define put_reg32(immu, offset, val) ddi_put32\ + ((immu)->immu_regs_handle, \ + (uint32_t *)(immu->immu_regs_addr + (offset)), val) +#define put_reg64(immu, offset, val) ddi_put64\ + ((immu)->immu_regs_handle, \ + (uint64_t *)(immu->immu_regs_addr + (offset)), val) + +/* + * wait max 60s for the hardware completion + */ +#define IMMU_MAX_WAIT_TIME 60000000 +#define wait_completion(immu, offset, getf, completion, status) \ +{ \ + clock_t stick = ddi_get_lbolt(); \ + clock_t ntick; \ + _NOTE(CONSTCOND) \ + while (1) { \ + status = getf(immu, offset); \ + ntick = ddi_get_lbolt(); \ + if (completion) { \ + break; \ + } \ + if (ntick - stick >= drv_usectohz(IMMU_MAX_WAIT_TIME)) { \ + ddi_err(DER_PANIC, NULL, \ + "immu wait completion time out"); \ + /*NOTREACHED*/ \ + } else { \ + iommu_cpu_nop();\ + }\ + }\ +} + +static ddi_device_acc_attr_t immu_regs_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC, +}; + +/* + * iotlb_flush() + * flush the iotlb cache + */ +static void +iotlb_flush(immu_t *immu, uint_t domain_id, + uint64_t addr, uint_t am, uint_t hint, immu_iotlb_inv_t type) +{ + uint64_t command = 0, iva = 0; + uint_t iva_offset, iotlb_offset; + uint64_t status = 0; + + ASSERT(MUTEX_HELD(&(immu->immu_regs_lock))); + + /* no lock needed since cap and excap fields are RDONLY */ + iva_offset = IMMU_ECAP_GET_IRO(immu->immu_regs_excap); + iotlb_offset = iva_offset + 8; + + /* + * prepare drain read/write command + */ + if (IMMU_CAP_GET_DWD(immu->immu_regs_cap)) { + command |= TLB_INV_DRAIN_WRITE; + } + + if (IMMU_CAP_GET_DRD(immu->immu_regs_cap)) { + command |= TLB_INV_DRAIN_READ; + } + + /* + * if the hardward doesn't support page selective invalidation, we + * will use domain type. Otherwise, use global type + */ + switch (type) { + case IOTLB_PSI: + if (!IMMU_CAP_GET_PSI(immu->immu_regs_cap) || + (am > IMMU_CAP_GET_MAMV(immu->immu_regs_cap)) || + (addr & IMMU_PAGEOFFSET)) { + goto ignore_psi; + } + command |= TLB_INV_PAGE | TLB_INV_IVT | + TLB_INV_DID(domain_id); + iva = addr | am | TLB_IVA_HINT(hint); + break; +ignore_psi: + case IOTLB_DSI: + command |= TLB_INV_DOMAIN | TLB_INV_IVT | + TLB_INV_DID(domain_id); + break; + case IOTLB_GLOBAL: + command |= TLB_INV_GLOBAL | TLB_INV_IVT; + break; + default: + ddi_err(DER_MODE, NULL, "%s: incorrect iotlb flush type", + immu->immu_name); + return; + } + + /* verify there is no pending command */ + wait_completion(immu, iotlb_offset, get_reg64, + (!(status & TLB_INV_IVT)), status); + if (iva) + put_reg64(immu, iva_offset, iva); + put_reg64(immu, iotlb_offset, command); + wait_completion(immu, iotlb_offset, get_reg64, + (!(status & TLB_INV_IVT)), status); +} + +/* + * iotlb_psi() + * iotlb page specific invalidation + */ +static void +iotlb_psi(immu_t *immu, uint_t domain_id, + uint64_t dvma, uint_t count, uint_t hint) +{ + uint_t am = 0; + uint_t max_am = 0; + uint64_t align = 0; + uint64_t dvma_pg = 0; + uint_t used_count = 0; + + mutex_enter(&(immu->immu_regs_lock)); + + /* choose page specified invalidation */ + if (IMMU_CAP_GET_PSI(immu->immu_regs_cap)) { + /* MAMV is valid only if PSI is set */ + max_am = IMMU_CAP_GET_MAMV(immu->immu_regs_cap); + while (count != 0) { + /* First calculate alignment of DVMA */ + dvma_pg = IMMU_BTOP(dvma); + ASSERT(dvma_pg != NULL); + ASSERT(count >= 1); + for (align = 1; (dvma_pg & align) == 0; align <<= 1) + ; + /* truncate count to the nearest power of 2 */ + for (used_count = 1, am = 0; count >> used_count != 0; + used_count <<= 1, am++) + ; + if (am > max_am) { + am = max_am; + used_count = 1 << am; + } + if (align >= used_count) { + iotlb_flush(immu, domain_id, + dvma, am, hint, IOTLB_PSI); + } else { + /* align < used_count */ + used_count = align; + for (am = 0; (1 << am) != used_count; am++) + ; + iotlb_flush(immu, domain_id, + dvma, am, hint, IOTLB_PSI); + } + count -= used_count; + dvma = (dvma_pg + used_count) << IMMU_PAGESHIFT; + } + } else { + /* choose domain invalidation */ + iotlb_flush(immu, domain_id, dvma, 0, 0, IOTLB_DSI); + } + + mutex_exit(&(immu->immu_regs_lock)); +} + +/* + * iotlb_dsi() + * domain specific invalidation + */ +static void +iotlb_dsi(immu_t *immu, uint_t domain_id) +{ + mutex_enter(&(immu->immu_regs_lock)); + iotlb_flush(immu, domain_id, 0, 0, 0, IOTLB_DSI); + mutex_exit(&(immu->immu_regs_lock)); +} + +/* + * iotlb_global() + * global iotlb invalidation + */ +static void +iotlb_global(immu_t *immu) +{ + mutex_enter(&(immu->immu_regs_lock)); + iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL); + mutex_exit(&(immu->immu_regs_lock)); +} + + +static int +gaw2agaw(int gaw) +{ + int r, agaw; + + r = (gaw - 12) % 9; + + if (r == 0) + agaw = gaw; + else + agaw = gaw + 9 - r; + + if (agaw > 64) + agaw = 64; + + return (agaw); +} + +/* + * set_immu_agaw() + * calculate agaw for a IOMMU unit + */ +static int +set_agaw(immu_t *immu) +{ + int mgaw, magaw, agaw; + uint_t bitpos; + int max_sagaw_mask, sagaw_mask, mask; + int nlevels; + + /* + * mgaw is the maximum guest address width. + * Addresses above this value will be + * blocked by the IOMMU unit. + * sagaw is a bitmask that lists all the + * AGAWs supported by this IOMMU unit. + */ + mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap); + sagaw_mask = IMMU_CAP_SAGAW(immu->immu_regs_cap); + + magaw = gaw2agaw(mgaw); + + /* + * Get bitpos corresponding to + * magaw + */ + + /* + * Maximum SAGAW is specified by + * Vt-d spec. + */ + max_sagaw_mask = ((1 << 5) - 1); + + if (sagaw_mask > max_sagaw_mask) { + ddi_err(DER_WARN, NULL, "%s: SAGAW bitmask (%x) " + "is larger than maximu SAGAW bitmask " + "(%x) specified by Intel Vt-d spec", + immu->immu_name, sagaw_mask, max_sagaw_mask); + return (DDI_FAILURE); + } + + /* + * Find a supported AGAW <= magaw + * + * sagaw_mask bitpos AGAW (bits) nlevels + * ============================================== + * 0 0 0 0 1 0 30 2 + * 0 0 0 1 0 1 39 3 + * 0 0 1 0 0 2 48 4 + * 0 1 0 0 0 3 57 5 + * 1 0 0 0 0 4 64(66) 6 + */ + mask = 1; + nlevels = 0; + agaw = 0; + for (mask = 1, bitpos = 0; bitpos < 5; + bitpos++, mask <<= 1) { + if (mask & sagaw_mask) { + nlevels = bitpos + 2; + agaw = 30 + (bitpos * 9); + } + } + + /* calculated agaw can be > 64 */ + agaw = (agaw > 64) ? 64 : agaw; + + if (agaw < 30 || agaw > magaw) { + ddi_err(DER_WARN, NULL, "%s: Calculated AGAW (%d) " + "is outside valid limits [30,%d] specified by Vt-d spec " + "and magaw", immu->immu_name, agaw, magaw); + return (DDI_FAILURE); + } + + if (nlevels < 2 || nlevels > 6) { + ddi_err(DER_WARN, NULL, "%s: Calculated pagetable " + "level (%d) is outside valid limits [2,6]", + immu->immu_name, nlevels); + return (DDI_FAILURE); + } + + ddi_err(DER_LOG, NULL, "Calculated pagetable " + "level (%d), agaw = %d", nlevels, agaw); + + immu->immu_dvma_nlevels = nlevels; + immu->immu_dvma_agaw = agaw; + + return (DDI_SUCCESS); +} + +static int +setup_regs(immu_t *immu) +{ + int error; + + ASSERT(immu); + ASSERT(immu->immu_name); + + /* + * This lock may be acquired by the IOMMU interrupt handler + */ + mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DRIVER, + (void *)ipltospl(IMMU_INTR_IPL)); + + /* + * map the register address space + */ + error = ddi_regs_map_setup(immu->immu_dip, 0, + (caddr_t *)&(immu->immu_regs_addr), (offset_t)0, + (offset_t)IMMU_REGSZ, &immu_regs_attr, + &(immu->immu_regs_handle)); + + if (error == DDI_FAILURE) { + ddi_err(DER_WARN, NULL, "%s: Intel IOMMU register map failed", + immu->immu_name); + mutex_destroy(&(immu->immu_regs_lock)); + return (DDI_FAILURE); + } + + /* + * get the register value + */ + immu->immu_regs_cap = get_reg64(immu, IMMU_REG_CAP); + immu->immu_regs_excap = get_reg64(immu, IMMU_REG_EXCAP); + + /* + * if the hardware access is non-coherent, we need clflush + */ + if (IMMU_ECAP_GET_C(immu->immu_regs_excap)) { + immu->immu_dvma_coherent = B_TRUE; + } else { + immu->immu_dvma_coherent = B_FALSE; + if (!(x86_feature & X86_CLFSH)) { + ddi_err(DER_WARN, NULL, + "immu unit %s can't be enabled due to " + "missing clflush functionality", immu->immu_name); + ddi_regs_map_free(&(immu->immu_regs_handle)); + mutex_destroy(&(immu->immu_regs_lock)); + return (DDI_FAILURE); + } + } + + /* + * Check for Mobile 4 series chipset + */ + if (immu_quirk_mobile4 == B_TRUE && + !IMMU_CAP_GET_RWBF(immu->immu_regs_cap)) { + ddi_err(DER_LOG, NULL, + "IMMU: Mobile 4 chipset quirk detected. " + "Force-setting RWBF"); + IMMU_CAP_SET_RWBF(immu->immu_regs_cap); + ASSERT(IMMU_CAP_GET_RWBF(immu->immu_regs_cap)); + } + + /* + * retrieve the maximum number of domains + */ + immu->immu_max_domains = IMMU_CAP_ND(immu->immu_regs_cap); + + /* + * calculate the agaw + */ + if (set_agaw(immu) != DDI_SUCCESS) { + ddi_regs_map_free(&(immu->immu_regs_handle)); + mutex_destroy(&(immu->immu_regs_lock)); + return (DDI_FAILURE); + } + immu->immu_regs_cmdval = 0; + + return (DDI_SUCCESS); +} + +/* ############### Functions exported ################## */ + +/* + * immu_regs_setup() + * Setup mappings to a IMMU unit's registers + * so that they can be read/written + */ +void +immu_regs_setup(list_t *listp) +{ + int i; + immu_t *immu; + + for (i = 0; i < IMMU_MAXSEG; i++) { + immu = list_head(listp); + for (; immu; immu = list_next(listp, immu)) { + /* do your best, continue on error */ + if (setup_regs(immu) != DDI_SUCCESS) { + immu->immu_regs_setup = B_FALSE; + } else { + immu->immu_regs_setup = B_TRUE; + } + } + } +} + +/* + * immu_regs_map() + */ +int +immu_regs_resume(immu_t *immu) +{ + int error; + + /* + * remap the register address space + */ + error = ddi_regs_map_setup(immu->immu_dip, 0, + (caddr_t *)&(immu->immu_regs_addr), (offset_t)0, + (offset_t)IMMU_REGSZ, &immu_regs_attr, + &(immu->immu_regs_handle)); + if (error != DDI_SUCCESS) { + return (DDI_FAILURE); + } + + immu_regs_set_root_table(immu); + + immu_regs_intr_enable(immu, immu->immu_regs_intr_msi_addr, + immu->immu_regs_intr_msi_data, immu->immu_regs_intr_uaddr); + + (void) immu_intr_handler(immu); + + immu_regs_intrmap_enable(immu, immu->immu_intrmap_irta_reg); + + immu_regs_qinv_enable(immu, immu->immu_qinv_reg_value); + + + return (error); +} + +/* + * immu_regs_suspend() + */ +void +immu_regs_suspend(immu_t *immu) +{ + + immu->immu_intrmap_running = B_FALSE; + + /* Finally, unmap the regs */ + ddi_regs_map_free(&(immu->immu_regs_handle)); +} + +/* + * immu_regs_startup() + * set a IMMU unit's registers to startup the unit + */ +void +immu_regs_startup(immu_t *immu) +{ + uint32_t status; + + if (immu->immu_regs_setup == B_FALSE) { + return; + } + + ASSERT(immu->immu_regs_running == B_FALSE); + + ASSERT(MUTEX_HELD(&(immu->immu_lock))); + + mutex_enter(&(immu->immu_regs_lock)); + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_TE); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_TES), status); + immu->immu_regs_cmdval |= IMMU_GCMD_TE; + immu->immu_regs_running = B_TRUE; + mutex_exit(&(immu->immu_regs_lock)); + + ddi_err(DER_NOTE, NULL, "IMMU %s running", immu->immu_name); +} + +/* + * immu_regs_shutdown() + * shutdown a unit + */ +void +immu_regs_shutdown(immu_t *immu) +{ + uint32_t status; + + if (immu->immu_regs_running == B_FALSE) { + return; + } + + ASSERT(immu->immu_regs_setup == B_TRUE); + + ASSERT(MUTEX_HELD(&(immu->immu_lock))); + + mutex_enter(&(immu->immu_regs_lock)); + immu->immu_regs_cmdval &= ~IMMU_GCMD_TE; + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, !(status & IMMU_GSTS_TES), status); + immu->immu_regs_running = B_FALSE; + mutex_exit(&(immu->immu_regs_lock)); + + ddi_err(DER_NOTE, NULL, "IOMMU %s stopped", immu->immu_name); +} + +/* + * immu_regs_intr() + * Set a IMMU unit regs to setup a IMMU unit's + * interrupt handler + */ +void +immu_regs_intr_enable(immu_t *immu, uint32_t msi_addr, uint32_t msi_data, + uint32_t uaddr) +{ + mutex_enter(&(immu->immu_regs_lock)); + immu->immu_regs_intr_msi_addr = msi_addr; + immu->immu_regs_intr_uaddr = uaddr; + immu->immu_regs_intr_msi_data = msi_data; + put_reg32(immu, IMMU_REG_FEVNT_ADDR, msi_addr); + put_reg32(immu, IMMU_REG_FEVNT_UADDR, uaddr); + put_reg32(immu, IMMU_REG_FEVNT_DATA, msi_data); + put_reg32(immu, IMMU_REG_FEVNT_CON, 0); + mutex_exit(&(immu->immu_regs_lock)); +} + +/* + * immu_regs_passthru_supported() + * Returns B_TRUE ifi passthru is supported + */ +boolean_t +immu_regs_passthru_supported(immu_t *immu) +{ + if (IMMU_ECAP_GET_PT(immu->immu_regs_excap)) { + return (B_TRUE); + } + + ddi_err(DER_WARN, NULL, "Passthru not supported"); + return (B_FALSE); +} + +/* + * immu_regs_is_TM_reserved() + * Returns B_TRUE if TM field is reserved + */ +boolean_t +immu_regs_is_TM_reserved(immu_t *immu) +{ + if (IMMU_ECAP_GET_DI(immu->immu_regs_excap) || + IMMU_ECAP_GET_CH(immu->immu_regs_excap)) { + return (B_FALSE); + } + return (B_TRUE); +} + +/* + * immu_regs_is_SNP_reserved() + * Returns B_TRUE if SNP field is reserved + */ +boolean_t +immu_regs_is_SNP_reserved(immu_t *immu) +{ + + return (IMMU_ECAP_GET_SC(immu->immu_regs_excap) ? B_FALSE : B_TRUE); +} + +/* + * immu_regs_wbf_flush() + * If required and supported, write to IMMU + * unit's regs to flush DMA write buffer(s) + */ +void +immu_regs_wbf_flush(immu_t *immu) +{ + uint32_t status; + + if (!IMMU_CAP_GET_RWBF(immu->immu_regs_cap)) { + return; + } + + mutex_enter(&(immu->immu_regs_lock)); + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_WBF); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (!(status & IMMU_GSTS_WBFS)), status); + mutex_exit(&(immu->immu_regs_lock)); +} + +/* + * immu_regs_cpu_flush() + * flush the cpu cache line after CPU memory writes, so + * IOMMU can see the writes + */ +void +immu_regs_cpu_flush(immu_t *immu, caddr_t addr, uint_t size) +{ + uint_t i; + + ASSERT(immu); + + if (immu->immu_dvma_coherent == B_TRUE) + return; + + for (i = 0; i < size; i += x86_clflush_size) { + clflush_insn(addr+i); + } + + mfence_insn(); +} + +void +immu_regs_iotlb_flush(immu_t *immu, uint_t domainid, uint64_t dvma, + uint64_t count, uint_t hint, immu_iotlb_inv_t type) +{ + ASSERT(immu); + + switch (type) { + case IOTLB_PSI: + ASSERT(domainid > 0); + ASSERT(dvma > 0); + ASSERT(count > 0); + iotlb_psi(immu, domainid, dvma, count, hint); + break; + case IOTLB_DSI: + ASSERT(domainid > 0); + ASSERT(dvma == 0); + ASSERT(count == 0); + ASSERT(hint == 0); + iotlb_dsi(immu, domainid); + break; + case IOTLB_GLOBAL: + ASSERT(domainid == 0); + ASSERT(dvma == 0); + ASSERT(count == 0); + ASSERT(hint == 0); + iotlb_global(immu); + break; + default: + ddi_err(DER_PANIC, NULL, "invalid IOTLB invalidation type: %d", + type); + /*NOTREACHED*/ + } +} + +/* + * immu_regs_context_flush() + * flush the context cache + */ +void +immu_regs_context_flush(immu_t *immu, uint8_t function_mask, + uint16_t sid, uint_t did, immu_context_inv_t type) +{ + uint64_t command = 0; + uint64_t status; + + ASSERT(immu); + ASSERT(rw_write_held(&(immu->immu_ctx_rwlock))); + + /* + * define the command + */ + switch (type) { + case CONTEXT_FSI: + command |= CCMD_INV_ICC | CCMD_INV_DEVICE + | CCMD_INV_DID(did) + | CCMD_INV_SID(sid) | CCMD_INV_FM(function_mask); + break; + case CONTEXT_DSI: + ASSERT(function_mask == 0); + ASSERT(sid == 0); + command |= CCMD_INV_ICC | CCMD_INV_DOMAIN + | CCMD_INV_DID(did); + break; + case CONTEXT_GLOBAL: + ASSERT(function_mask == 0); + ASSERT(sid == 0); + ASSERT(did == 0); + command |= CCMD_INV_ICC | CCMD_INV_GLOBAL; + break; + default: + ddi_err(DER_PANIC, NULL, + "%s: incorrect context cache flush type", + immu->immu_name); + /*NOTREACHED*/ + } + + mutex_enter(&(immu->immu_regs_lock)); + /* verify there is no pending command */ + wait_completion(immu, IMMU_REG_CONTEXT_CMD, get_reg64, + (!(status & CCMD_INV_ICC)), status); + put_reg64(immu, IMMU_REG_CONTEXT_CMD, command); + wait_completion(immu, IMMU_REG_CONTEXT_CMD, get_reg64, + (!(status & CCMD_INV_ICC)), status); + mutex_exit(&(immu->immu_regs_lock)); +} + +void +immu_regs_set_root_table(immu_t *immu) +{ + uint32_t status; + + mutex_enter(&(immu->immu_regs_lock)); + put_reg64(immu, IMMU_REG_ROOTENTRY, + immu->immu_ctx_root->hwpg_paddr); + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_SRTP); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_RTPS), status); + mutex_exit(&(immu->immu_regs_lock)); +} + + +/* enable queued invalidation interface */ +void +immu_regs_qinv_enable(immu_t *immu, uint64_t qinv_reg_value) +{ + uint32_t status; + + if (immu_qinv_enable == B_FALSE) + return; + + mutex_enter(&immu->immu_regs_lock); + immu->immu_qinv_reg_value = qinv_reg_value; + /* Initialize the Invalidation Queue Tail register to zero */ + put_reg64(immu, IMMU_REG_INVAL_QT, 0); + + /* set invalidation queue base address register */ + put_reg64(immu, IMMU_REG_INVAL_QAR, qinv_reg_value); + + /* enable queued invalidation interface */ + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_QIE); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_QIES), status); + mutex_exit(&immu->immu_regs_lock); + + immu->immu_regs_cmdval |= IMMU_GCMD_QIE; + immu->immu_qinv_running = B_TRUE; + +} + +/* enable interrupt remapping hardware unit */ +void +immu_regs_intrmap_enable(immu_t *immu, uint64_t irta_reg) +{ + uint32_t status; + + if (immu_intrmap_enable == B_FALSE) + return; + + /* set interrupt remap table pointer */ + mutex_enter(&(immu->immu_regs_lock)); + immu->immu_intrmap_irta_reg = irta_reg; + put_reg64(immu, IMMU_REG_IRTAR, irta_reg); + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_SIRTP); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_IRTPS), status); + mutex_exit(&(immu->immu_regs_lock)); + + /* global flush intr entry cache */ + if (immu_qinv_enable == B_TRUE) + immu_qinv_intr_global(immu); + + /* enable interrupt remapping */ + mutex_enter(&(immu->immu_regs_lock)); + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_IRE); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_IRES), + status); + immu->immu_regs_cmdval |= IMMU_GCMD_IRE; + + /* set compatible mode */ + put_reg32(immu, IMMU_REG_GLOBAL_CMD, + immu->immu_regs_cmdval | IMMU_GCMD_CFI); + wait_completion(immu, IMMU_REG_GLOBAL_STS, + get_reg32, (status & IMMU_GSTS_CFIS), + status); + immu->immu_regs_cmdval |= IMMU_GCMD_CFI; + mutex_exit(&(immu->immu_regs_lock)); + + immu->immu_intrmap_running = B_TRUE; +} + +uint64_t +immu_regs_get64(immu_t *immu, uint_t reg) +{ + return (get_reg64(immu, reg)); +} + +uint32_t +immu_regs_get32(immu_t *immu, uint_t reg) +{ + return (get_reg32(immu, reg)); +} + +void +immu_regs_put64(immu_t *immu, uint_t reg, uint64_t val) +{ + put_reg64(immu, reg, val); +} + +void +immu_regs_put32(immu_t *immu, uint_t reg, uint32_t val) +{ + put_reg32(immu, reg, val); +}
--- a/usr/src/uts/i86pc/io/intel_iommu.c Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4939 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ - -/* - * Intel IOMMU implementation - */ -#include <sys/conf.h> -#include <sys/modctl.h> -#include <sys/pci.h> -#include <sys/pci_impl.h> -#include <sys/sysmacros.h> -#include <sys/ddi.h> -#include <sys/ddidmareq.h> -#include <sys/ddi_impldefs.h> -#include <sys/ddifm.h> -#include <sys/sunndi.h> -#include <sys/debug.h> -#include <sys/fm/protocol.h> -#include <sys/note.h> -#include <sys/apic.h> -#include <vm/hat_i86.h> -#include <sys/smp_impldefs.h> -#include <sys/spl.h> -#include <sys/archsystm.h> -#include <sys/x86_archext.h> -#include <sys/rootnex.h> -#include <sys/avl.h> -#include <sys/bootconf.h> -#include <sys/bootinfo.h> -#include <sys/intel_iommu.h> -#include <sys/atomic.h> -#include <sys/iommulib.h> -#include <sys/memlist.h> -#include <sys/pcie.h> -#include <sys/pci_cfgspace.h> - -/* - * Macros based on PCI spec - */ -#define GET_DEV(devfn) (devfn >> 3) /* get device from devicefunc */ -#define GET_FUNC(devfn) (devfn & 7) /* get func from devicefunc */ -#define GET_DEVFUNC(d, f) (((d) << 3) | (f)) /* create devicefunc */ -#define REV2CLASS(r) ((r) >> 8) /* Get classcode from revid */ -#define CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */ -#define CLASS2SUB(c) (((c) >> 8) & 0xff); /* subclass from classcode */ - -static boolean_t drhd_only_for_gfx(intel_iommu_state_t *iommu); -static void iommu_bringup_unit(intel_iommu_state_t *iommu); - -/* - * Are we on a Mobile 4 Series Chipset - */ -static int mobile4_cs = 0; - -/* - * Activate usb workaround for some Mobile 4 Series Chipset based platforms - * On Toshiba laptops, its observed that usb devices appear to - * read physical page 0. If we enable RW access via iommu, system doesnt - * hang, otherwise the system hangs when the last include-all engine is - * enabled for translation. - * This happens only when enabling legacy emulation mode. - */ -static int usb_page0_quirk = 1; -static int usb_fullpa_quirk = 0; -static int usb_rmrr_quirk = 1; - -/* - * internal variables - * iommu_states - the list of iommu - * domain_states - the list of domain - * rmrr_states - the list of rmrr - * page_num - the count of pages for iommu page tables - */ -static list_t iommu_states; -static list_t domain_states; -static list_t rmrr_states; -static uint_t page_num; - -/* - * record some frequently used dips - */ -static dev_info_t *root_devinfo = NULL; -static dev_info_t *lpc_devinfo = NULL; - -/* - * A single element in the BDF based cache of private structs - */ -typedef struct bdf_private_entry { - int bpe_seg; - int bpe_bus; - int bpe_devfcn; - iommu_private_t *bpe_private; - struct bdf_private_entry *bpe_next; -} bdf_private_entry_t; - -/* - * Head of the BDF based cache of private structs - */ -typedef struct bdf_private_cache { - kmutex_t bpc_lock; - bdf_private_entry_t *bpc_cache; -} bdf_private_cache_t; - -static bdf_private_cache_t bdf_private_cache; - -/* - * dvma cache related variables - */ -static uint_t dvma_cache_high = 64; -static dvma_cookie_head_t cookie_cache[MAX_COOKIE_CACHE_SIZE]; - -/* ioapic info for interrupt remapping */ -static ioapic_iommu_info_t *ioapic_iommu_infos[MAX_IO_APIC]; - -/* - * switch to turn on/off the gfx dma remapping unit, - * this is used when there is a dedicated drhd for the - * gfx - */ -int gfx_drhd_disable = 0; -static dev_info_t *gfx_devinfo = NULL; - -/* - * switch to disable dmar remapping unit, even the initiation work has - * been finished - */ -int dmar_drhd_disable = 0; - -/* - * switch to disable queued invalidation interface/interrupt remapping - */ -int qinv_disable = 0; -int intrr_disable = 0; - -static char *dmar_fault_reason[] = { - "Reserved", - "The present field in root-entry is Clear", - "The present field in context-entry is Clear", - "Hardware detected invalid programming of a context-entry", - "The DMA request attempted to access an address beyond max support", - "The Write field in a page-table entry is Clear when DMA write", - "The Read field in a page-table entry is Clear when DMA read", - "Access the next level page table resulted in error", - "Access the root-entry table resulted in error", - "Access the context-entry table resulted in error", - "Reserved field not initialized to zero in a present root-entry", - "Reserved field not initialized to zero in a present context-entry", - "Reserved field not initialized to zero in a present page-table entry", - "DMA blocked due to the Translation Type field in context-entry", - "Incorrect fault event reason number" -}; - -#define DMAR_MAX_REASON_NUMBER (14) - -#define IOMMU_IOVPTE_TABLE_SIZE (IOMMU_LEVEL_SIZE * sizeof (struct iovpte)) - -/* - * Check if the device has mobile 4 chipset quirk - */ -static int -check_hwquirk_walk(dev_info_t *dip, void *arg) -{ - _NOTE(ARGUNUSED(arg)) - int vendor_id, device_id; - - vendor_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, - "vendor-id", -1); - device_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, - "device-id", -1); - - if (vendor_id == 0x8086 && device_id == 0x2a40) { - mobile4_cs = 1; - return (DDI_WALK_TERMINATE); - } else { - return (DDI_WALK_CONTINUE); - } -} - -static void -check_hwquirk(void) -{ - int count; - - /* - * walk through the entire device tree - */ - ndi_devi_enter(root_devinfo, &count); - ddi_walk_devs(ddi_get_child(root_devinfo), check_hwquirk_walk, NULL); - ndi_devi_exit(root_devinfo, count); -} - -#define IOMMU_ALLOC_RESOURCE_DELAY drv_usectohz(5000) - -/* - * QS field of Invalidation Queue Address Register - * the size of invalidation queue is 1 << (qinv_iqa_qs + 8) - */ -static uint_t qinv_iqa_qs = 6; - -/* - * the invalidate desctiptor type of queued invalidation interface - */ -static char *qinv_dsc_type[] = { - "Reserved", - "Context Cache Invalidate Descriptor", - "IOTLB Invalidate Descriptor", - "Device-IOTLB Invalidate Descriptor", - "Interrupt Entry Cache Invalidate Descriptor", - "Invalidation Wait Descriptor", - "Incorrect queue invalidation type" -}; - -#define QINV_MAX_DSC_TYPE (6) - -/* - * S field of the Interrupt Remapping Table Address Register - * the size of the interrupt remapping table is 1 << (intrr_irta_s + 1) - */ -static uint_t intrr_irta_s = INTRR_MAX_IRTA_SIZE; - -/* - * If true, arrange to suppress broadcast EOI by setting edge-triggered mode - * even for level-triggered interrupts in the interrupt-remapping engine. - * If false, broadcast EOI can still be suppressed if the CPU supports the - * APIC_SVR_SUPPRESS_BROADCAST_EOI bit. In both cases, the IOAPIC is still - * programmed with the correct trigger mode, and pcplusmp must send an EOI - * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the - * missing broadcast EOI. - */ -static int intrr_suppress_brdcst_eoi = 0; - -/* - * whether verify the source id of interrupt request - */ -static int intrr_enable_sid_verify = 0; - -/* the fault reason for interrupt remapping */ -static char *intrr_fault_reason[] = { - "reserved field set in IRTE", - "interrupt_index exceed the intr-remap table size", - "present field in IRTE is clear", - "hardware access intr-remap table address resulted in error", - "reserved field set in IRTE, inlcude various conditional", - "hardware blocked an interrupt request in Compatibility format", - "remappable interrupt request blocked due to verification failure" -}; - -#define INTRR_MAX_REASON_NUMBER (6) - -/* - * the queued invalidation interface functions - */ -static int iommu_qinv_init(intel_iommu_state_t *iommu); -static void iommu_qinv_fini(intel_iommu_state_t *iommu); -static void iommu_qinv_enable(intel_iommu_state_t *iommu); -static void qinv_submit_inv_dsc(intel_iommu_state_t *iommu, inv_dsc_t *dsc); -static void qinv_cc_common(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id, ctt_inv_g_t type); -static void qinv_iotlb_common(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type); -static void qinv_iec_common(intel_iommu_state_t *iommu, uint_t iidx, - uint_t im, uint_t g); -static void qinv_iec_global(intel_iommu_state_t *iommu); -static void qinv_iec_single(intel_iommu_state_t *iommu, uint_t iidx); -static void qinv_iec(intel_iommu_state_t *iommu, uint_t iidx, uint_t cnt); -static uint_t qinv_alloc_sync_mem_entry(intel_iommu_state_t *iommu); -static void qinv_wait_async_unfence(intel_iommu_state_t *iommu, - iotlb_pend_node_t *node); -static void qinv_wait_sync(intel_iommu_state_t *iommu); -static int qinv_wait_async_finish(intel_iommu_state_t *iommu, int *count); -static void qinv_cc_fsi(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id); -static void qinv_cc_dsi(intel_iommu_state_t *iommu, uint_t domain_id); -static void qinv_cc_gbl(intel_iommu_state_t *iommu); -static void qinv_iotlb_psi(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t dvma, uint_t count, uint_t hint); -static void qinv_iotlb_dsi(intel_iommu_state_t *iommu, uint_t domain_id); -static void qinv_iotlb_gbl(intel_iommu_state_t *iommu); -static void qinv_plant_wait(intel_iommu_state_t *iommu, - iommu_dvma_cookie_t *dcookies, uint_t count, uint_t array_size); -static void qinv_reap_wait(intel_iommu_state_t *iommu); - -/*LINTED*/ -static void qinv_wait_async_fence(intel_iommu_state_t *iommu); -/*LINTED*/ -static void qinv_dev_iotlb_common(intel_iommu_state_t *iommu, uint16_t sid, - uint64_t addr, uint_t size, uint_t max_invs_pd); - -/* interrupt remapping related functions */ -static int intr_remap_init_unit(intel_iommu_state_t *iommu); -static void intr_remap_fini_unit(intel_iommu_state_t *iommu); -static void intr_remap_enable_unit(intel_iommu_state_t *iommu); -static uint_t bitset_find_free(bitset_t *, uint_t); -static uint_t bitset_find_multi_free(bitset_t *, uint_t, uint_t); -static int intrr_tbl_alloc_entry(intr_remap_tbl_state_t *); -static int intrr_tbl_alloc_multi_entries(intr_remap_tbl_state_t *, uint_t); -static void get_ioapic_iommu_info(void); -static void intr_remap_get_iommu(apic_irq_t *); -static void intr_remap_get_sid(apic_irq_t *); - -static int intr_remap_init(int); -static void intr_remap_enable(int); -static void intr_remap_alloc_entry(apic_irq_t *); -static void intr_remap_map_entry(apic_irq_t *, void *); -static void intr_remap_free_entry(apic_irq_t *); -static void intr_remap_record_rdt(apic_irq_t *, ioapic_rdt_t *); -static void intr_remap_record_msi(apic_irq_t *, msi_regs_t *); - -static struct apic_intrr_ops intr_remap_ops = { - intr_remap_init, - intr_remap_enable, - intr_remap_alloc_entry, - intr_remap_map_entry, - intr_remap_free_entry, - intr_remap_record_rdt, - intr_remap_record_msi, -}; - -/* apic mode, APIC/X2APIC */ -static int intrr_apic_mode = LOCAL_APIC; - -/* - * cpu_clflush() - * flush the cpu cache line - */ -static void -cpu_clflush(caddr_t addr, uint_t size) -{ - uint_t i; - - for (i = 0; i < size; i += x86_clflush_size) { - clflush_insn(addr+i); - } - - mfence_insn(); -} - -/* - * iommu_page_init() - * do some init work for the iommu page allocator - */ -static void -iommu_page_init(void) -{ - page_num = 0; -} - -/* - * iommu_get_page() - * get a 4k iommu page, and zero out it - */ -static paddr_t -iommu_get_page(intel_iommu_state_t *iommu, int kmflag) -{ - iommu_pghdl_t *pghdl; - caddr_t vaddr; - - pghdl = iommu_page_alloc(iommu, kmflag); - vaddr = pghdl->vaddr; - bzero(vaddr, IOMMU_PAGE_SIZE); - iommu->iu_dmar_ops->do_clflush(vaddr, IOMMU_PAGE_SIZE); - - page_num++; - - return (pghdl->paddr); -} - -/* - * iommu_free_page() - * free the iommu page allocated with iommu_get_page - */ -static void -iommu_free_page(intel_iommu_state_t *iommu, paddr_t paddr) -{ - iommu_page_free(iommu, paddr); - page_num--; -} - -#define iommu_get_reg32(iommu, offset) ddi_get32((iommu)->iu_reg_handle, \ - (uint32_t *)(iommu->iu_reg_address + (offset))) -#define iommu_get_reg64(iommu, offset) ddi_get64((iommu)->iu_reg_handle, \ - (uint64_t *)(iommu->iu_reg_address + (offset))) -#define iommu_put_reg32(iommu, offset, val) ddi_put32\ - ((iommu)->iu_reg_handle, \ - (uint32_t *)(iommu->iu_reg_address + (offset)), val) -#define iommu_put_reg64(iommu, offset, val) ddi_put64\ - ((iommu)->iu_reg_handle, \ - (uint64_t *)(iommu->iu_reg_address + (offset)), val) - -/* - * calculate_agaw() - * calculate agaw from gaw - */ -static int -calculate_agaw(int gaw) -{ - int r, agaw; - - r = (gaw - 12) % 9; - - if (r == 0) - agaw = gaw; - else - agaw = gaw + 9 - r; - - if (agaw > 64) - agaw = 64; - - return (agaw); -} - -/* - * destroy_iommu_state() - * destory an iommu state - */ -static void -destroy_iommu_state(intel_iommu_state_t *iommu) -{ - iommu_free_page(iommu, iommu->iu_root_entry_paddr); - iommu_rscs_fini(&(iommu->iu_domain_id_hdl)); - mutex_destroy(&(iommu->iu_reg_lock)); - mutex_destroy(&(iommu->iu_root_context_lock)); - ddi_regs_map_free(&(iommu->iu_reg_handle)); - kmem_free(iommu->iu_dmar_ops, sizeof (struct dmar_ops)); - - if (iommu->iu_inv_queue) { - iommu_qinv_fini(iommu); - } - - if (iommu->iu_intr_remap_tbl) { - intr_remap_fini_unit(iommu); - } - - kmem_free(iommu, sizeof (intel_iommu_state_t)); -} - -/* - * iommu_update_stats - update iommu private kstat counters - * - * This routine will dump and reset the iommu's internal - * statistics counters. The current stats dump values will - * be sent to the kernel status area. - */ -static int -iommu_update_stats(kstat_t *ksp, int rw) -{ - intel_iommu_state_t *iommu; - iommu_kstat_t *iommu_ksp; - const char *state; - - if (rw == KSTAT_WRITE) - return (EACCES); - - iommu = (intel_iommu_state_t *)ksp->ks_private; - ASSERT(iommu != NULL); - iommu_ksp = (iommu_kstat_t *)ksp->ks_data; - ASSERT(iommu_ksp != NULL); - - state = (iommu->iu_enabled & DMAR_ENABLE) ? "enabled" : "disabled"; - (void) strcpy(iommu_ksp->is_dmar_enabled.value.c, state); - state = (iommu->iu_enabled & QINV_ENABLE) ? "enabled" : "disabled"; - (void) strcpy(iommu_ksp->is_qinv_enabled.value.c, state); - state = (iommu->iu_enabled & INTRR_ENABLE) ? - "enabled" : "disabled"; - (void) strcpy(iommu_ksp->is_intrr_enabled.value.c, state); - iommu_ksp->is_iotlb_psi.value.ui64 = - iommu->iu_statistics.st_iotlb_psi; - iommu_ksp->is_iotlb_domain.value.ui64 = - iommu->iu_statistics.st_iotlb_domain; - iommu_ksp->is_iotlb_global.value.ui64 = - iommu->iu_statistics.st_iotlb_global; - iommu_ksp->is_write_buffer.value.ui64 = - iommu->iu_statistics.st_write_buffer; - iommu_ksp->is_context_cache.value.ui64 = - iommu->iu_statistics.st_context_cache; - iommu_ksp->is_wait_complete_us.value.ui64 = - drv_hztousec(iommu->iu_statistics.st_wait_complete_us); - iommu_ksp->is_domain_alloc.value.ui64 = - iommu->iu_statistics.st_domain_alloc; - iommu_ksp->is_page_used.value.ui64 = page_num; - - return (0); -} - -/* - * iommu_init_stats - initialize kstat data structures - * - * This routine will create and initialize the iommu private - * statistics counters. - */ -int -iommu_init_stats(intel_iommu_state_t *iommu) -{ - kstat_t *ksp; - iommu_kstat_t *iommu_ksp; - - /* - * Create and init kstat - */ - ksp = kstat_create("rootnex", 0, - ddi_node_name(iommu->iu_drhd->di_dip), - "misc", KSTAT_TYPE_NAMED, - sizeof (iommu_kstat_t) / sizeof (kstat_named_t), 0); - - if (ksp == NULL) { - cmn_err(CE_WARN, - "Could not create kernel statistics for %s", - ddi_node_name(iommu->iu_drhd->di_dip)); - return (DDI_FAILURE); - } - - iommu->iu_kstat = ksp; - iommu_ksp = (iommu_kstat_t *)ksp->ks_data; - - /* - * Initialize all the statistics - */ - kstat_named_init(&(iommu_ksp->is_dmar_enabled), "dmar_enable", - KSTAT_DATA_CHAR); - kstat_named_init(&(iommu_ksp->is_qinv_enabled), "qinv_enable", - KSTAT_DATA_CHAR); - kstat_named_init(&(iommu_ksp->is_intrr_enabled), "intrr_enable", - KSTAT_DATA_CHAR); - kstat_named_init(&(iommu_ksp->is_iotlb_psi), "iotlb_psi", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_iotlb_domain), "iotlb_domain", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_iotlb_global), "iotlb_global", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_write_buffer), "write_buffer", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_context_cache), "context_cache", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_wait_complete_us), "wait_complete_us", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_page_used), "physical_page_used", - KSTAT_DATA_UINT64); - kstat_named_init(&(iommu_ksp->is_domain_alloc), "domain_allocated", - KSTAT_DATA_UINT64); - - /* - * Function to provide kernel stat update on demand - */ - ksp->ks_update = iommu_update_stats; - - /* - * Pointer into provider's raw statistics - */ - ksp->ks_private = (void *)iommu; - - /* - * Add kstat to systems kstat chain - */ - kstat_install(ksp); - - return (DDI_SUCCESS); -} - -/* - * iommu_intr_handler() - * the fault event handler for a single drhd - */ -static int -iommu_intr_handler(intel_iommu_state_t *iommu) -{ - uint32_t status; - int index, fault_reg_offset; - int max_fault_index; - int any_fault = 0; - - mutex_enter(&(iommu->iu_reg_lock)); - - /* read the fault status */ - status = iommu_get_reg32(iommu, IOMMU_REG_FAULT_STS); - - /* check if we have a pending fault for this IOMMU */ - if (!(status & IOMMU_FAULT_STS_PPF)) { - goto no_primary_faults; - } - - /* - * handle all primary pending faults - */ - any_fault = 1; - index = IOMMU_FAULT_GET_INDEX(status); - max_fault_index = IOMMU_CAP_GET_NFR(iommu->iu_capability) - 1; - fault_reg_offset = IOMMU_CAP_GET_FRO(iommu->iu_capability); - - _NOTE(CONSTCOND) - while (1) { - uint64_t val; - uint8_t fault_reason; - uint8_t fault_type; - uint16_t sid; - uint64_t pg_addr; - uint64_t iidx; - - /* read the higher 64bits */ - val = iommu_get_reg64(iommu, - fault_reg_offset + index * 16 + 8); - - /* check if pending fault */ - if (!IOMMU_FRR_GET_F(val)) - break; - - /* get the fault reason, fault type and sid */ - fault_reason = IOMMU_FRR_GET_FR(val); - fault_type = IOMMU_FRR_GET_FT(val); - sid = IOMMU_FRR_GET_SID(val); - - /* read the first 64bits */ - val = iommu_get_reg64(iommu, - fault_reg_offset + index * 16); - pg_addr = val & IOMMU_PAGE_MASK; - iidx = val >> 48; - - /* clear the fault */ - iommu_put_reg32(iommu, fault_reg_offset + index * 16 + 12, - (((uint32_t)1) << 31)); - - /* report the fault info */ - if (fault_reason < 0x20) { - /* dmar-remapping fault */ - cmn_err(CE_WARN, - "%s generated a fault event when translating " - "DMA %s\n" - "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), " - "the reason is:\n\t %s", - ddi_node_name(iommu->iu_drhd->di_dip), - fault_type ? "read" : "write", pg_addr, - (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7, - dmar_fault_reason[MIN(fault_reason, - DMAR_MAX_REASON_NUMBER)]); - } else if (fault_reason < 0x27) { - /* intr-remapping fault */ - cmn_err(CE_WARN, - "%s generated a fault event when translating " - "interrupt request\n" - "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), " - "the reason is:\n\t %s", - ddi_node_name(iommu->iu_drhd->di_dip), - iidx, - (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7, - intrr_fault_reason[MIN((fault_reason - 0x20), - INTRR_MAX_REASON_NUMBER)]); - } - - index++; - if (index > max_fault_index) - index = 0; - } - -no_primary_faults: - - /* - * handle queued invalidation interface errors - */ - if (status & IOMMU_FAULT_STS_IQE) { - uint64_t head; - inv_dsc_t *dsc; - - head = QINV_IQA_HEAD( - iommu_get_reg64(iommu, IOMMU_REG_INVAL_QH)); - dsc = (inv_dsc_t *)(iommu->iu_inv_queue->iq_table.vaddr - + (head * QINV_ENTRY_SIZE)); - - /* report the error */ - cmn_err(CE_WARN, - "%s generated a fault when fetching a descriptor from the\n" - "\tinvalidation queue, or detects that the fetched\n" - "\tdescriptor is invalid. The head register is " - "0x%" PRIx64 ",\n" - "\tthe type is %s\n", - ddi_node_name(iommu->iu_drhd->di_dip), head, - qinv_dsc_type[MIN(INV_DSC_TYPE(dsc), - QINV_MAX_DSC_TYPE)]); - } - - /* - * Hardware received an unexpected or invalid Device-IOTLB - * invalidation completion - */ - if (status & IOMMU_FAULT_STS_ICE) { - cmn_err(CE_WARN, - "Hardware received an unexpected or invalid " - "Device-IOTLB invalidation completion.\n"); - } - - /* - * Hardware detected a Device-IOTLB invalidation - * completion time-out - */ - if (status & IOMMU_FAULT_STS_ITE) { - cmn_err(CE_WARN, - "Hardware detected a Device-IOTLB invalidation " - "completion time-out.\n"); - } - - /* clear the fault */ - iommu_put_reg32(iommu, IOMMU_REG_FAULT_STS, 1); - - mutex_exit(&(iommu->iu_reg_lock)); - - return (any_fault ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); -} - -/* - * Function to identify a display device from the PCI class code - */ -static int -device_is_display(uint_t classcode) -{ - static uint_t disp_classes[] = { - 0x000100, - 0x030000, - 0x030001 - }; - int i, nclasses = sizeof (disp_classes) / sizeof (uint_t); - - for (i = 0; i < nclasses; i++) { - if (classcode == disp_classes[i]) - return (1); - } - return (0); -} - -/* - * Function that determines if device is PCIEX and/or PCIEX bridge - */ -static int -device_is_pciex(uchar_t bus, uchar_t dev, uchar_t func, int *is_pci_bridge) -{ - ushort_t cap; - ushort_t capsp; - ushort_t cap_count = PCI_CAP_MAX_PTR; - ushort_t status; - int is_pciex = 0; - - *is_pci_bridge = 0; - - status = pci_getw_func(bus, dev, func, PCI_CONF_STAT); - if (!(status & PCI_STAT_CAP)) - return (0); - - capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR); - while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) { - capsp &= PCI_CAP_PTR_MASK; - cap = pci_getb_func(bus, dev, func, capsp); - - if (cap == PCI_CAP_ID_PCI_E) { - status = pci_getw_func(bus, dev, func, capsp + 2); - /* - * See section 7.8.2 of PCI-Express Base Spec v1.0a - * for Device/Port Type. - * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the - * device is a PCIe2PCI bridge - */ - *is_pci_bridge = - ((status & PCIE_PCIECAP_DEV_TYPE_MASK) == - PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? 1 : 0; - - is_pciex = 1; - } - - capsp = (*pci_getb_func)(bus, dev, func, - capsp + PCI_CAP_NEXT_PTR); - } - - return (is_pciex); -} - -/* - * Allocate a private structure and initialize it - */ -static iommu_private_t * -iommu_create_private(int bus, int dev, int func) -{ - uchar_t basecl, subcl; - uint_t classcode, revclass; - iommu_private_t *private; - int pciex = 0; - int is_pci_bridge = 0; - - /* No cached private struct. Create one */ - private = kmem_zalloc(sizeof (iommu_private_t), KM_SLEEP); - private->idp_seg = 0; /* Currently seg can only be 0 */ - private->idp_bus = bus; - private->idp_devfn = GET_DEVFUNC(dev, func); - private->idp_sec = 0; - private->idp_sub = 0; - private->idp_bbp_type = IOMMU_PPB_NONE; - - /* record the bridge */ - revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID); - - classcode = REV2CLASS(revclass); - basecl = CLASS2BASE(classcode); - subcl = CLASS2SUB(classcode); - - private->idp_is_bridge = ((basecl == PCI_CLASS_BRIDGE) && - (subcl == PCI_BRIDGE_PCI)); - - if (private->idp_is_bridge) { - private->idp_sec = pci_getb_func(bus, dev, func, - PCI_BCNF_SECBUS); - private->idp_sub = pci_getb_func(bus, dev, func, - PCI_BCNF_SUBBUS); - - pciex = device_is_pciex(bus, dev, func, &is_pci_bridge); - if (pciex && is_pci_bridge) - private->idp_bbp_type = IOMMU_PPB_PCIE_PCI; - else if (pciex) - private->idp_bbp_type = IOMMU_PPB_PCIE_PCIE; - else - private->idp_bbp_type = IOMMU_PPB_PCI_PCI; - } - - /* record the special devices */ - private->idp_is_display = - (device_is_display(classcode) ? B_TRUE : B_FALSE); - - private->idp_is_lpc = ((basecl == PCI_CLASS_BRIDGE) && - (subcl == PCI_BRIDGE_ISA)); - private->idp_intel_domain = NULL; - - return (private); -} - -/* - * Set the private struct in the private field of a devinfo node - */ -static int -iommu_set_private(dev_info_t *dip) -{ - bdf_private_entry_t *bpe, *new; - int bus, device, func, seg; - iommu_private_t *pvt; - dmar_domain_state_t *domain; - - seg = 0; /* NOTE: Currently seg always = 0 */ - bus = device = func = -1; - - if (acpica_get_bdf(dip, &bus, &device, &func) != DDI_SUCCESS) { - /* probably not PCI device */ - return (DDI_FAILURE); - } - - /* - * We always need a private structure, whether it was cached - * or not previously, since a hotplug may change the type of - * device - for example we may have had a bridge here before, - * and now we could have a leaf device - */ - pvt = iommu_create_private(bus, device, func); - ASSERT(pvt); - - /* assume new cache entry needed */ - new = kmem_zalloc(sizeof (*new), KM_SLEEP); - - mutex_enter(&bdf_private_cache.bpc_lock); - - for (bpe = bdf_private_cache.bpc_cache; bpe; bpe = bpe->bpe_next) { - if (bpe->bpe_seg == seg && - bpe->bpe_bus == bus && - bpe->bpe_devfcn == GET_DEVFUNC(device, func)) { - break; - } - } - - if (bpe) { - /* extry exists, new not needed */ - kmem_free(new, sizeof (*new)); - ASSERT(bpe->bpe_private); - domain = bpe->bpe_private->idp_intel_domain; - /* domain may be NULL */ - kmem_free(bpe->bpe_private, sizeof (iommu_private_t)); - bpe->bpe_private = pvt; - pvt->idp_intel_domain = domain; - } else { - new->bpe_seg = pvt->idp_seg; - new->bpe_bus = pvt->idp_bus; - new->bpe_devfcn = pvt->idp_devfn; - new->bpe_private = pvt; - new->bpe_next = bdf_private_cache.bpc_cache; - bdf_private_cache.bpc_cache = new; - } - DEVI(dip)->devi_iommu_private = pvt; - - mutex_exit(&bdf_private_cache.bpc_lock); - return (DDI_SUCCESS); -} - - -/* - * intel_iommu_init() - * the interface to setup interrupt handlers and init the DMAR units - */ -static void -intel_iommu_init(void) -{ - int ipl, irq, vect; - intel_iommu_state_t *iommu; - char intr_name[64]; - uint32_t msi_addr, msi_data; - uint32_t iommu_instance = 0; - ipl = IOMMU_INTR_IPL; - - msi_addr = (MSI_ADDR_HDR | - ((apic_cpus[0].aci_local_id & 0xFF) << MSI_ADDR_DEST_SHIFT) | - (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | - (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT)); - - for_each_in_list(&iommu_states, iommu) { - irq = psm_get_ipivect(ipl, -1); - vect = apic_irq_table[irq]->airq_vector; - msi_data = - ((MSI_DATA_DELIVERY_FIXED << MSI_DATA_DELIVERY_SHIFT) | - vect); - (void) snprintf(intr_name, sizeof (intr_name), - "iommu intr%d", iommu_instance++); - (void) add_avintr((void *)NULL, ipl, - (avfunc)(iommu_intr_handler), - intr_name, irq, (caddr_t)iommu, - NULL, NULL, NULL); - (void) iommu_intr_handler(iommu); - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg32(iommu, IOMMU_REG_FEVNT_ADDR, msi_addr); - if (intrr_apic_mode == LOCAL_X2APIC) { - iommu_put_reg32(iommu, IOMMU_REG_FEVNT_UADDR, - apic_cpus[0].aci_local_id & 0xFFFFFF00); - } else { - iommu_put_reg32(iommu, IOMMU_REG_FEVNT_UADDR, 0); - } - iommu_put_reg32(iommu, IOMMU_REG_FEVNT_DATA, msi_data); - iommu_put_reg32(iommu, IOMMU_REG_FEVNT_CON, 0); - mutex_exit(&(iommu->iu_reg_lock)); - } - - /* - * enable dma remapping - */ - cmn_err(CE_CONT, "?Start to enable the dmar units\n"); - if (!dmar_drhd_disable) { - for_each_in_list(&iommu_states, iommu) { - if (gfx_drhd_disable && - drhd_only_for_gfx(iommu)) - continue; - iommu_bringup_unit(iommu); - } - } -} - -/* - * wait max 60s for the hardware completion - */ -#define IOMMU_WAIT_TIME 60000000 -#define iommu_wait_completion(iommu, offset, getf, completion, status) \ -{ \ - clock_t stick = ddi_get_lbolt(); \ - clock_t ntick; \ - _NOTE(CONSTCOND) \ - while (1) { \ - status = getf(iommu, offset); \ - ntick = ddi_get_lbolt(); \ - if (completion) {\ - atomic_add_64\ - (&(iommu->iu_statistics.st_wait_complete_us),\ - ntick - stick);\ - break; \ - } \ - if (ntick - stick >= drv_usectohz(IOMMU_WAIT_TIME)) { \ - cmn_err(CE_PANIC, \ - "iommu wait completion time out\n"); \ - } else { \ - iommu_cpu_nop();\ - }\ - }\ -} - -/* - * dmar_flush_write_buffer() - * flush the write buffer - */ -static void -dmar_flush_write_buffer(intel_iommu_state_t *iommu) -{ - uint32_t status; - - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_WBF); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, !(status & IOMMU_GSTS_WBFS), status); - mutex_exit(&(iommu->iu_reg_lock)); - - /* record the statistics */ - atomic_inc_64(&(iommu->iu_statistics.st_write_buffer)); -} - -/* - * dmar_flush_iotlb_common() - * flush the iotlb cache - */ -static void -dmar_flush_iotlb_common(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type) -{ - uint64_t command = 0, iva = 0, status; - uint_t iva_offset, iotlb_offset; - - iva_offset = IOMMU_ECAP_GET_IRO(iommu->iu_excapability); - iotlb_offset = iva_offset + 8; - - /* - * prepare drain read/write command - */ - if (IOMMU_CAP_GET_DWD(iommu->iu_capability)) { - command |= TLB_INV_DRAIN_WRITE; - } - - if (IOMMU_CAP_GET_DRD(iommu->iu_capability)) { - command |= TLB_INV_DRAIN_READ; - } - - /* - * if the hardward doesn't support page selective invalidation, we - * will use domain type. Otherwise, use global type - */ - switch (type) { - case TLB_INV_G_PAGE: - if (!IOMMU_CAP_GET_PSI(iommu->iu_capability) || - am > IOMMU_CAP_GET_MAMV(iommu->iu_capability) || - addr & IOMMU_PAGE_OFFSET) { - goto ignore_psi; - } - command |= TLB_INV_PAGE | TLB_INV_IVT | - TLB_INV_DID(domain_id); - iva = addr | am | TLB_IVA_HINT(hint); - break; -ignore_psi: - case TLB_INV_G_DOMAIN: - command |= TLB_INV_DOMAIN | TLB_INV_IVT | - TLB_INV_DID(domain_id); - break; - case TLB_INV_G_GLOBAL: - command |= TLB_INV_GLOBAL | TLB_INV_IVT; - break; - default: - cmn_err(CE_WARN, "incorrect iotlb flush type"); - return; - } - - /* - * do the actual flush - */ - mutex_enter(&(iommu->iu_reg_lock)); - /* verify there is no pending command */ - iommu_wait_completion(iommu, iotlb_offset, iommu_get_reg64, - !(status & TLB_INV_IVT), status); - if (iva) - iommu_put_reg64(iommu, iva_offset, iva); - iommu_put_reg64(iommu, iotlb_offset, command); - iommu_wait_completion(iommu, iotlb_offset, iommu_get_reg64, - !(status & TLB_INV_IVT), status); - mutex_exit(&(iommu->iu_reg_lock)); - - /* - * check the result and record the statistics - */ - switch (TLB_INV_GET_IAIG(status)) { - /* global */ - case 1: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_global)); - break; - /* domain */ - case 2: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_domain)); - break; - /* psi */ - case 3: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_psi)); - break; - default: - break; - } -} - -/* - * dmar_flush_iotlb_psi() - * register based iotlb psi invalidation - */ -static void -dmar_flush_iotlb_psi(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t dvma, uint_t count, uint_t hint) -{ - uint_t am = 0; - uint_t max_am = 0; - uint64_t align = 0; - uint64_t dvma_pg = 0; - uint_t used_count = 0; - - /* choose page specified invalidation */ - if (IOMMU_CAP_GET_PSI(iommu->iu_capability)) { - /* MAMV is valid only if PSI is set */ - max_am = IOMMU_CAP_GET_MAMV(iommu->iu_capability); - while (count != 0) { - /* First calculate alignment of DVMA */ - dvma_pg = IOMMU_BTOP(dvma); - ASSERT(dvma_pg != NULL); - ASSERT(count >= 1); - for (align = 1; (dvma_pg & align) == 0; align <<= 1) - ; - /* truncate count to the nearest power of 2 */ - for (used_count = 1, am = 0; count >> used_count != 0; - used_count <<= 1, am++) - ; - if (am > max_am) { - am = max_am; - used_count = 1 << am; - } - if (align >= used_count) { - dmar_flush_iotlb_common(iommu, domain_id, - dvma, am, hint, TLB_INV_G_PAGE); - } else { - /* align < used_count */ - used_count = align; - for (am = 0; (1 << am) != used_count; am++) - ; - dmar_flush_iotlb_common(iommu, domain_id, - dvma, am, hint, TLB_INV_G_PAGE); - } - count -= used_count; - dvma = (dvma_pg + used_count) << IOMMU_PAGE_SHIFT; - } - /* choose domain invalidation */ - } else { - dmar_flush_iotlb_common(iommu, domain_id, dvma, - 0, 0, TLB_INV_G_DOMAIN); - } -} - -/* - * dmar_flush_iotlb_dsi() - * flush dsi iotlb - */ -static void -dmar_flush_iotlb_dsi(intel_iommu_state_t *iommu, uint_t domain_id) -{ - dmar_flush_iotlb_common(iommu, domain_id, 0, 0, 0, TLB_INV_G_DOMAIN); -} - -/* - * dmar_flush_iotlb_glb() - * flush global iotbl - */ -static void -dmar_flush_iotlb_glb(intel_iommu_state_t *iommu) -{ - dmar_flush_iotlb_common(iommu, 0, 0, 0, 0, TLB_INV_G_GLOBAL); -} - - -/* - * dmar_flush_context_cache() - * flush the context cache - */ -static void -dmar_flush_context_cache(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id, ctt_inv_g_t type) -{ - uint64_t command = 0, status; - - /* - * define the command - */ - switch (type) { - case CTT_INV_G_DEVICE: - command |= CCMD_INV_ICC | CCMD_INV_DEVICE - | CCMD_INV_DID(domain_id) - | CCMD_INV_SID(source_id) | CCMD_INV_FM(function_mask); - break; - case CTT_INV_G_DOMAIN: - command |= CCMD_INV_ICC | CCMD_INV_DOMAIN - | CCMD_INV_DID(domain_id); - break; - case CTT_INV_G_GLOBAL: - command |= CCMD_INV_ICC | CCMD_INV_GLOBAL; - break; - default: - cmn_err(CE_WARN, "incorrect context cache flush type"); - return; - } - - mutex_enter(&(iommu->iu_reg_lock)); - /* verify there is no pending command */ - iommu_wait_completion(iommu, IOMMU_REG_CONTEXT_CMD, iommu_get_reg64, - !(status & CCMD_INV_ICC), status); - iommu_put_reg64(iommu, IOMMU_REG_CONTEXT_CMD, command); - iommu_wait_completion(iommu, IOMMU_REG_CONTEXT_CMD, iommu_get_reg64, - !(status & CCMD_INV_ICC), status); - mutex_exit(&(iommu->iu_reg_lock)); - - /* record the context cache statistics */ - atomic_inc_64(&(iommu->iu_statistics.st_context_cache)); -} - -/* - * dmar_flush_context_fsi() - * function based context cache flush - */ -static void -dmar_flush_context_fsi(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id) -{ - dmar_flush_context_cache(iommu, function_mask, source_id, - domain_id, CTT_INV_G_DEVICE); -} - -/* - * dmar_flush_context_dsi() - * domain based context cache flush - */ -static void -dmar_flush_context_dsi(intel_iommu_state_t *iommu, uint_t domain_id) -{ - dmar_flush_context_cache(iommu, 0, 0, domain_id, CTT_INV_G_DOMAIN); -} - -/* - * dmar_flush_context_gbl() - * flush global context cache - */ -static void -dmar_flush_context_gbl(intel_iommu_state_t *iommu) -{ - dmar_flush_context_cache(iommu, 0, 0, 0, CTT_INV_G_GLOBAL); -} - -/* - * dmar_set_root_entry_table() - * set root entry table - */ -static void -dmar_set_root_table(intel_iommu_state_t *iommu) -{ - uint32_t status; - - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg64(iommu, IOMMU_REG_ROOTENTRY, - iommu->iu_root_entry_paddr); - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_SRTP); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_RTPS), status); - mutex_exit(&(iommu->iu_reg_lock)); -} - -/* - * dmar_enable_unit() - * enable the dmar unit - */ -static void -dmar_enable_unit(intel_iommu_state_t *iommu) -{ - uint32_t status; - - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - IOMMU_GCMD_TE); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_TES), status); - mutex_exit(&(iommu->iu_reg_lock)); - iommu->iu_global_cmd_reg |= IOMMU_GCMD_TE; - iommu->iu_enabled |= DMAR_ENABLE; - cmn_err(CE_CONT, "?\t%s enabled\n", - ddi_node_name(iommu->iu_drhd->di_dip)); -} - -/* - * iommu_bringup_unit() - * the processes to bring up a dmar unit - */ -static void -iommu_bringup_unit(intel_iommu_state_t *iommu) -{ - /* - * flush the iommu write buffer - */ - iommu->iu_dmar_ops->do_flwb(iommu); - - /* - * set root entry table - */ - iommu->iu_dmar_ops->do_set_root_table(iommu); - - /* - * flush the context cache - */ - iommu->iu_dmar_ops->do_context_gbl(iommu); - - /* - * flush the iotlb cache - */ - iommu->iu_dmar_ops->do_iotlb_gbl(iommu); - - /* - * at last enable the unit - */ - iommu->iu_dmar_ops->do_enable(iommu); - - /* enable queued invalidation */ - if (iommu->iu_inv_queue) - iommu_qinv_enable(iommu); -} - -/* - * iommu_dvma_cache_get() - * get a dvma from the cache - */ -static uint64_t -iommu_dvma_cache_get(dmar_domain_state_t *domain, - size_t size, size_t align, size_t nocross) -{ - dvma_cache_node_t *cache_node = NULL; - dvma_cache_head_t *cache_head; - uint_t index = IOMMU_BTOP(size) - 1; - uint64_t ioaddr; - - if (index >= DVMA_CACHE_HEAD_CNT) - return (0); - - cache_head = &(domain->dm_dvma_cache[index]); - mutex_enter(&(cache_head->dch_free_lock)); - for_each_in_list(&(cache_head->dch_free_list), cache_node) { - if ((cache_node->dcn_align >= align) && - ((nocross == 0) || - ((cache_node->dcn_dvma ^ (cache_node->dcn_dvma + size - 1)) - < (nocross - 1)))) { - list_remove(&(cache_head->dch_free_list), - cache_node); - cache_head->dch_free_count--; - break; - } - } - mutex_exit(&(cache_head->dch_free_lock)); - - if (cache_node) { - ioaddr = cache_node->dcn_dvma; - mutex_enter(&(cache_head->dch_mem_lock)); - list_insert_head(&(cache_head->dch_mem_list), cache_node); - mutex_exit(&(cache_head->dch_mem_lock)); - return (ioaddr); - } - - return (0); -} - -/* - * iommu_dvma_cache_put() - * put a dvma to the cache after use - */ -static void -iommu_dvma_cache_put(dmar_domain_state_t *domain, uint64_t dvma, - size_t size, size_t align) -{ - dvma_cache_node_t *cache_node = NULL; - dvma_cache_head_t *cache_head; - uint_t index = IOMMU_BTOP(size) - 1; - boolean_t shrink = B_FALSE; - - /* out of cache range */ - if (index >= DVMA_CACHE_HEAD_CNT) { - vmem_xfree(domain->dm_dvma_map, - (void *)(intptr_t)dvma, size); - return; - } - - cache_head = &(domain->dm_dvma_cache[index]); - - /* get a node block */ - mutex_enter(&(cache_head->dch_mem_lock)); - cache_node = list_head(&(cache_head->dch_mem_list)); - if (cache_node) { - list_remove(&(cache_head->dch_mem_list), cache_node); - } - mutex_exit(&(cache_head->dch_mem_lock)); - - /* no cache, alloc one */ - if (cache_node == NULL) { - cache_node = kmem_alloc(sizeof (dvma_cache_node_t), KM_SLEEP); - } - - /* initialize this node */ - cache_node->dcn_align = align; - cache_node->dcn_dvma = dvma; - - /* insert into the free list */ - mutex_enter(&(cache_head->dch_free_lock)); - list_insert_head(&(cache_head->dch_free_list), cache_node); - - /* shrink the cache list */ - if (cache_head->dch_free_count++ > dvma_cache_high) { - cache_node = list_tail(&(cache_head->dch_free_list)); - list_remove(&(cache_head->dch_free_list), cache_node); - shrink = B_TRUE; - cache_head->dch_free_count--; - } - mutex_exit(&(cache_head->dch_free_lock)); - - if (shrink) { - ASSERT(cache_node); - vmem_xfree(domain->dm_dvma_map, - (void *)(intptr_t)(cache_node->dcn_dvma), size); - kmem_free(cache_node, sizeof (dvma_cache_node_t)); - } -} - -/* - * iommu_dvma_cache_flush() - * flush the dvma caches when vmem_xalloc() failed - */ -static void -iommu_dvma_cache_flush(dmar_domain_state_t *domain, dev_info_t *dip) -{ - dvma_cache_node_t *cache_node; - dvma_cache_head_t *cache_head; - uint_t index; - - cmn_err(CE_NOTE, "domain dvma cache for %s flushed", - ddi_node_name(dip)); - - for (index = 0; index < DVMA_CACHE_HEAD_CNT; index++) { - cache_head = &(domain->dm_dvma_cache[index]); - mutex_enter(&(cache_head->dch_free_lock)); - cache_node = list_head(&(cache_head->dch_free_list)); - while (cache_node) { - list_remove(&(cache_head->dch_free_list), cache_node); - vmem_xfree(domain->dm_dvma_map, - (void *)(intptr_t)(cache_node->dcn_dvma), - IOMMU_PTOB(index + 1)); - kmem_free(cache_node, sizeof (dvma_cache_node_t)); - cache_head->dch_free_count--; - cache_node = list_head(&(cache_head->dch_free_list)); - } - ASSERT(cache_head->dch_free_count == 0); - mutex_exit(&(cache_head->dch_free_lock)); - } -} - -/* - * get_dvma_cookie_array() - * get a dvma cookie array from the cache or allocate - */ -static iommu_dvma_cookie_t * -get_dvma_cookie_array(uint_t array_size) -{ - dvma_cookie_head_t *cache_head; - iommu_dvma_cookie_t *cookie = NULL; - - if (array_size > MAX_COOKIE_CACHE_SIZE) { - return (kmem_alloc(sizeof (iommu_dvma_cookie_t) * array_size, - KM_SLEEP)); - } - - cache_head = &(cookie_cache[array_size - 1]); - mutex_enter(&(cache_head->dch_lock)); - /* LINTED E_EQUALITY_NOT_ASSIGNMENT */ - if (cookie = cache_head->dch_next) { - cache_head->dch_next = cookie->dc_next; - cache_head->dch_count--; - } - mutex_exit(&(cache_head->dch_lock)); - - if (cookie) { - return (cookie); - } else { - return (kmem_alloc(sizeof (iommu_dvma_cookie_t) * array_size, - KM_SLEEP)); - } -} - -/* - * put_dvma_cookie_array() - * put a dvma cookie array to the cache or free - */ -static void -put_dvma_cookie_array(iommu_dvma_cookie_t *dcookies, uint_t array_size) -{ - dvma_cookie_head_t *cache_head; - - if (array_size > MAX_COOKIE_CACHE_SIZE) { - kmem_free(dcookies, sizeof (iommu_dvma_cookie_t) * array_size); - return; - } - - cache_head = &(cookie_cache[array_size - 1]); - mutex_enter(&(cache_head->dch_lock)); - dcookies->dc_next = cache_head->dch_next; - cache_head->dch_next = dcookies; - cache_head->dch_count++; - mutex_exit(&(cache_head->dch_lock)); -} - -/* - * dmar_reg_plant_wait() - * the plant wait operation for register based cache invalidation - */ -static void -dmar_reg_plant_wait(intel_iommu_state_t *iommu, iommu_dvma_cookie_t *dcookies, - uint_t count, uint_t array_size) -{ - iotlb_pend_node_t *node = NULL; - iotlb_pend_head_t *head; - - head = &(iommu->iu_pend_head); - - /* get a node */ - mutex_enter(&(head->ich_mem_lock)); - node = list_head(&(head->ich_mem_list)); - if (node) { - list_remove(&(head->ich_mem_list), node); - } - mutex_exit(&(head->ich_mem_lock)); - - /* no cache, alloc one */ - if (node == NULL) { - node = kmem_alloc(sizeof (iotlb_pend_node_t), KM_SLEEP); - } - - /* initialize this node */ - node->icn_dcookies = dcookies; - node->icn_count = count; - node->icn_array_size = array_size; - - /* insert into the pend list */ - mutex_enter(&(head->ich_pend_lock)); - list_insert_tail(&(head->ich_pend_list), node); - head->ich_pend_count++; - mutex_exit(&(head->ich_pend_lock)); -} - -/* - * dmar_release_dvma_cookie() - * release the dvma cookie - */ -static void -dmar_release_dvma_cookie(iommu_dvma_cookie_t *dcookies, - uint_t count, uint_t array_size) -{ - uint_t i; - - /* free dvma */ - for (i = 0; i < count; i++) { - iommu_dvma_cache_put(dcookies[i].dc_domain, - dcookies[i].dc_addr, dcookies[i].dc_size, - dcookies[i].dc_align); - } - - /* free the cookie array */ - put_dvma_cookie_array(dcookies, array_size); -} - -/* - * dmar_reg_reap_wait() - * the reap wait operation for register based cache invalidation - */ -static void -dmar_reg_reap_wait(intel_iommu_state_t *iommu) -{ - iotlb_pend_node_t *node; - iotlb_pend_head_t *head; - - head = &(iommu->iu_pend_head); - mutex_enter(&(head->ich_pend_lock)); - node = list_head(&(head->ich_pend_list)); - if (node) { - list_remove(&(head->ich_pend_list), node); - head->ich_pend_count--; - } - mutex_exit(&(head->ich_pend_lock)); - - if (node) { - dmar_release_dvma_cookie(node->icn_dcookies, - node->icn_count, node->icn_array_size); - /* put the node into the node cache */ - mutex_enter(&(head->ich_mem_lock)); - list_insert_head(&(head->ich_mem_list), node); - mutex_exit(&(head->ich_mem_lock)); - } -} - -/* - * dmar_init_ops() - * init dmar ops - */ -static void -dmar_init_ops(intel_iommu_state_t *iommu) -{ - struct dmar_ops *ops; - - ASSERT(iommu); - ops = kmem_alloc(sizeof (struct dmar_ops), KM_SLEEP); - - /* initialize the dmar operations */ - ops->do_enable = dmar_enable_unit; - ops->do_fault = iommu_intr_handler; - - /* cpu clflush */ - if (iommu->iu_coherency) { - ops->do_clflush = (void (*)(caddr_t, uint_t))return_instr; - } else { - ASSERT(x86_feature & X86_CLFSH); - ops->do_clflush = cpu_clflush; - } - - /* Check for Mobile 4 Series Chipset */ - if (mobile4_cs && !IOMMU_CAP_GET_RWBF(iommu->iu_capability)) { - cmn_err(CE_WARN, - "Mobile 4 Series chipset present, activating quirks\n"); - iommu->iu_capability |= (1 << 4); - if (IOMMU_CAP_GET_RWBF(iommu->iu_capability)) - cmn_err(CE_WARN, "Setting RWBF forcefully\n"); - } - - /* write buffer */ - if (IOMMU_CAP_GET_RWBF(iommu->iu_capability)) { - ops->do_flwb = dmar_flush_write_buffer; - } else { - ops->do_flwb = (void (*)(intel_iommu_state_t *))return_instr; - } - - /* cache related functions */ - ops->do_iotlb_psi = dmar_flush_iotlb_psi; - ops->do_iotlb_dsi = dmar_flush_iotlb_dsi; - ops->do_iotlb_gbl = dmar_flush_iotlb_glb; - ops->do_context_fsi = dmar_flush_context_fsi; - ops->do_context_dsi = dmar_flush_context_dsi; - ops->do_context_gbl = dmar_flush_context_gbl; - ops->do_plant_wait = dmar_reg_plant_wait; - ops->do_reap_wait = dmar_reg_reap_wait; - - ops->do_set_root_table = dmar_set_root_table; - - - iommu->iu_dmar_ops = ops; -} - -/* - * create_iommu_state() - * alloc and setup the iommu state - */ -static int -create_iommu_state(drhd_info_t *drhd) -{ - intel_iommu_state_t *iommu; - int mgaw, sagaw, agaw; - int bitnum; - int ret; - - static ddi_device_acc_attr_t ioattr = { - DDI_DEVICE_ATTR_V0, - DDI_NEVERSWAP_ACC, - DDI_STRICTORDER_ACC, - }; - - iommu = kmem_zalloc(sizeof (intel_iommu_state_t), KM_SLEEP); - drhd->di_iommu = (void *)iommu; - iommu->iu_drhd = drhd; - - /* - * map the register address space - */ - ret = ddi_regs_map_setup(iommu->iu_drhd->di_dip, 0, - (caddr_t *)&(iommu->iu_reg_address), (offset_t)0, - (offset_t)IOMMU_REG_SIZE, &ioattr, - &(iommu->iu_reg_handle)); - - if (ret != DDI_SUCCESS) { - cmn_err(CE_WARN, "iommu register map failed: %d", ret); - kmem_free(iommu, sizeof (intel_iommu_state_t)); - return (DDI_FAILURE); - } - - mutex_init(&(iommu->iu_reg_lock), NULL, MUTEX_DRIVER, - (void *)ipltospl(IOMMU_INTR_IPL)); - mutex_init(&(iommu->iu_root_context_lock), NULL, MUTEX_DRIVER, NULL); - - /* - * get the register value - */ - iommu->iu_capability = iommu_get_reg64(iommu, IOMMU_REG_CAP); - iommu->iu_excapability = iommu_get_reg64(iommu, IOMMU_REG_EXCAP); - - /* - * if the hardware access is non-coherent, we need clflush - */ - if (IOMMU_ECAP_GET_C(iommu->iu_excapability)) { - iommu->iu_coherency = B_TRUE; - } else { - iommu->iu_coherency = B_FALSE; - if (!(x86_feature & X86_CLFSH)) { - cmn_err(CE_WARN, "drhd can't be enabled due to " - "missing clflush functionality"); - ddi_regs_map_free(&(iommu->iu_reg_handle)); - kmem_free(iommu, sizeof (intel_iommu_state_t)); - return (DDI_FAILURE); - } - } - - /* - * retrieve the maximum number of domains - */ - iommu->iu_max_domain = IOMMU_CAP_ND(iommu->iu_capability); - - /* - * setup the domain id allocator - * domain id 0 is reserved by the architecture - */ - iommu_rscs_init(1, iommu->iu_max_domain, &(iommu->iu_domain_id_hdl)); - - /* - * calculate the agaw - */ - mgaw = IOMMU_CAP_MGAW(iommu->iu_capability); - sagaw = IOMMU_CAP_SAGAW(iommu->iu_capability); - iommu->iu_gaw = mgaw; - agaw = calculate_agaw(iommu->iu_gaw); - bitnum = (agaw - 30) / 9; - - while (bitnum < 5) { - if (sagaw & (1 << bitnum)) - break; - else - bitnum++; - } - - if (bitnum >= 5) { - cmn_err(CE_PANIC, "can't determine agaw"); - /*NOTREACHED*/ - return (DDI_FAILURE); - } else { - iommu->iu_agaw = 30 + bitnum * 9; - if (iommu->iu_agaw > 64) - iommu->iu_agaw = 64; - iommu->iu_level = bitnum + 2; - } - - /* - * the iommu is orginally disabled - */ - iommu->iu_enabled = 0; - iommu->iu_global_cmd_reg = 0; - - /* - * init kstat - */ - (void) iommu_init_stats(iommu); - bzero(&(iommu->iu_statistics), sizeof (iommu_stat_t)); - - /* - * init dmar ops - */ - dmar_init_ops(iommu); - - /* - * alloc root entry table, this should put after init ops - */ - iommu->iu_root_entry_paddr = iommu_get_page(iommu, KM_SLEEP); - - /* - * init queued invalidation interface - */ - iommu->iu_inv_queue = NULL; - if (IOMMU_ECAP_GET_QI(iommu->iu_excapability) && !qinv_disable) { - if (iommu_qinv_init(iommu) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "%s init queued invalidation interface failed\n", - ddi_node_name(iommu->iu_drhd->di_dip)); - } - } - - /* - * init intr remapping table state pointer - */ - iommu->iu_intr_remap_tbl = NULL; - - /* - * initialize the iotlb pending list and cache - */ - mutex_init(&(iommu->iu_pend_head.ich_pend_lock), NULL, - MUTEX_DRIVER, NULL); - list_create(&(iommu->iu_pend_head.ich_pend_list), - sizeof (iotlb_pend_node_t), - offsetof(iotlb_pend_node_t, node)); - iommu->iu_pend_head.ich_pend_count = 0; - - mutex_init(&(iommu->iu_pend_head.ich_mem_lock), NULL, - MUTEX_DRIVER, NULL); - list_create(&(iommu->iu_pend_head.ich_mem_list), - sizeof (iotlb_pend_node_t), - offsetof(iotlb_pend_node_t, node)); - - /* - * insert this iommu into the list - */ - list_insert_tail(&iommu_states, iommu); - - /* - * report this unit - */ - cmn_err(CE_CONT, "?\t%s state structure created\n", - ddi_node_name(iommu->iu_drhd->di_dip)); - - return (DDI_SUCCESS); -} - -/* - * match_dip_sbdf() - * walk function for get_dip_from_info() - */ -static int -match_dip_sbdf(dev_info_t *dip, void *arg) -{ - iommu_private_t *private; - pci_dev_info_t *info = arg; - - if (DEVI(dip)->devi_iommu_private == NULL && - iommu_set_private(dip) != DDI_SUCCESS) { - return (DDI_WALK_CONTINUE); - } - - private = DEVI(dip)->devi_iommu_private; - - ASSERT(private); - - if ((info->pdi_seg == private->idp_seg) && - (info->pdi_bus == private->idp_bus) && - (info->pdi_devfn == private->idp_devfn)) { - info->pdi_dip = dip; - return (DDI_WALK_TERMINATE); - } - return (DDI_WALK_CONTINUE); -} - -/* - * get_dip_from_info() - * get the dev_info structure by pass a bus/dev/func - */ -static int -get_dip_from_info(pci_dev_info_t *info) -{ - int count; - info->pdi_dip = NULL; - - ndi_devi_enter(root_devinfo, &count); - ddi_walk_devs(ddi_get_child(root_devinfo), - match_dip_sbdf, info); - ndi_devi_exit(root_devinfo, count); - - if (info->pdi_dip) - return (DDI_SUCCESS); - else - return (DDI_FAILURE); -} - -/* - * iommu_get_pci_top_bridge() - * get the top level bridge for a pci device - */ -static dev_info_t * -iommu_get_pci_top_bridge(dev_info_t *dip) -{ - iommu_private_t *private; - dev_info_t *tmp, *pdip; - - tmp = NULL; - pdip = ddi_get_parent(dip); - for (; pdip && pdip != root_devinfo; pdip = ddi_get_parent(pdip)) { - if (DEVI(pdip)->devi_iommu_private == NULL && - iommu_set_private(pdip) != DDI_SUCCESS) - continue; - - private = DEVI(pdip)->devi_iommu_private; - ASSERT(private); - - if ((private->idp_bbp_type == IOMMU_PPB_PCIE_PCI) || - (private->idp_bbp_type == IOMMU_PPB_PCI_PCI)) - tmp = pdip; - } - - return (tmp); -} - -/* - * domain_vmem_init() - * initiate the domain vmem - */ -static void -domain_vmem_init(dmar_domain_state_t *domain) -{ - char vmem_name[64]; - static uint_t vmem_instance = 0; - struct memlist *mp; - uint64_t start, end; - void *vmem_ret; - - (void) snprintf(vmem_name, sizeof (vmem_name), - "domain_vmem_%d", vmem_instance++); - - memlist_read_lock(); - mp = phys_install; - end = (mp->ml_address + mp->ml_size); - - /* - * Skip page 0: vmem_create wont like it for obvious - * reasons. - */ - if (mp->ml_address == 0) { - start = IOMMU_PAGE_SIZE; - } else { - start = mp->ml_address; - } - - cmn_err(CE_CONT, "?Adding iova [0x%" PRIx64 - " - 0x%" PRIx64 "] to %s\n", start, end, - vmem_name); - - domain->dm_dvma_map = vmem_create(vmem_name, - (void *)(uintptr_t)start, /* base */ - end - start, /* size */ - IOMMU_PAGE_SIZE, /* quantum */ - NULL, /* afunc */ - NULL, /* ffunc */ - NULL, /* source */ - 0, /* qcache_max */ - VM_SLEEP); - - if (domain->dm_dvma_map == NULL) { - cmn_err(CE_PANIC, "Unable to inialize vmem map\n"); - } - - mp = mp->ml_next; - while (mp) { - vmem_ret = vmem_add(domain->dm_dvma_map, - (void *)((uintptr_t)mp->ml_address), - mp->ml_size, VM_NOSLEEP); - cmn_err(CE_CONT, "?Adding iova [0x%" PRIx64 - " - 0x%" PRIx64 "] to %s\n", mp->ml_address, - mp->ml_address + mp->ml_size, vmem_name); - if (!vmem_ret) - cmn_err(CE_PANIC, "Unable to inialize vmem map\n"); - mp = mp->ml_next; - } - - memlist_read_unlock(); -} - -/* - * iommu_domain_init() - * initiate a domain - */ -static int -iommu_domain_init(dmar_domain_state_t *domain) -{ - uint_t i; - - /* - * allocate the domain id - */ - if (iommu_rscs_alloc(domain->dm_iommu->iu_domain_id_hdl, - &(domain->dm_domain_id)) != DDI_SUCCESS) { - cmn_err(CE_WARN, "domain id exhausted %p, assign 1", - (void *)domain->dm_iommu); - domain->dm_domain_id = 1; - } - - /* - * record the domain statistics - */ - atomic_inc_64(&(domain->dm_iommu->iu_statistics.st_domain_alloc)); - - /* - * create vmem map - */ - domain_vmem_init(domain); - - /* - * create the first level page table - */ - domain->dm_page_table_paddr = iommu_get_page(domain->dm_iommu, - KM_SLEEP); - - mutex_init(&(domain->dm_pgtable_lock), NULL, MUTEX_DRIVER, NULL); - - /* - * init the CPU available page tables - */ - domain->dm_pt_tree.vp = kmem_zalloc(IOMMU_IOVPTE_TABLE_SIZE, KM_SLEEP); - domain->dm_pt_tree.pp = iommu_get_vaddr(domain->dm_iommu, - domain->dm_page_table_paddr); - domain->dm_identity = B_FALSE; - - /* - * init the dvma cache - */ - for (i = 0; i < DVMA_CACHE_HEAD_CNT; i++) { - /* init the free list */ - mutex_init(&(domain->dm_dvma_cache[i].dch_free_lock), - NULL, MUTEX_DRIVER, NULL); - list_create(&(domain->dm_dvma_cache[i].dch_free_list), - sizeof (dvma_cache_node_t), - offsetof(dvma_cache_node_t, node)); - domain->dm_dvma_cache[i].dch_free_count = 0; - - /* init the memory cache list */ - mutex_init(&(domain->dm_dvma_cache[i].dch_mem_lock), - NULL, MUTEX_DRIVER, NULL); - list_create(&(domain->dm_dvma_cache[i].dch_mem_list), - sizeof (dvma_cache_node_t), - offsetof(dvma_cache_node_t, node)); - } - - list_insert_tail(&domain_states, domain); - - return (DDI_SUCCESS); -} - -/* - * Get first ancestor with a non-NULL private struct - */ -static dev_info_t * -iommu_get_ancestor_private(dev_info_t *dip) -{ - dev_info_t *pdip; - - pdip = ddi_get_parent(dip); - for (; pdip && pdip != root_devinfo; pdip = ddi_get_parent(pdip)) { - if (DEVI(pdip)->devi_iommu_private == NULL && - iommu_set_private(pdip) != DDI_SUCCESS) - continue; - ASSERT(DEVI(pdip)->devi_iommu_private); - return (pdip); - } - - return (NULL); -} - -/* - * dmar_check_sub() - * check to see if the device is under scope of a p2p bridge - */ -static boolean_t -dmar_check_sub(dev_info_t *dip, int seg, pci_dev_scope_t *devs) -{ - dev_info_t *pdip; - iommu_private_t *private; - int bus = devs->pds_bus; - int devfn = ((devs->pds_dev << 3) | devs->pds_func); - - ASSERT(dip != root_devinfo); - - pdip = ddi_get_parent(dip); - for (; pdip && pdip != root_devinfo; pdip = ddi_get_parent(pdip)) { - if (DEVI(pdip)->devi_iommu_private == NULL && - iommu_set_private(pdip) != DDI_SUCCESS) - continue; - private = DEVI(pdip)->devi_iommu_private; - ASSERT(private); - if ((private->idp_seg == seg) && - (private->idp_bus == bus) && - (private->idp_devfn == devfn)) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * iommu_get_dmar() - * get the iommu structure for a device - */ -static intel_iommu_state_t * -iommu_get_dmar(dev_info_t *dip) -{ - iommu_private_t *private = NULL; - int seg, bus, dev, func; - pci_dev_scope_t *devs; - drhd_info_t *drhd; - - bus = dev = func = -1; - - seg = 0; - if (DEVI(dip)->devi_iommu_private || - iommu_set_private(dip) == DDI_SUCCESS) { - private = DEVI(dip)->devi_iommu_private; - ASSERT(private); - seg = private->idp_seg; - bus = private->idp_bus; - dev = GET_DEV(private->idp_devfn); - func = GET_FUNC(private->idp_devfn); - } - - /* - * walk the drhd list for a match - */ - for_each_in_list(&(dmar_info->dmari_drhd[seg]), drhd) { - - /* - * match the include all - */ - if (drhd->di_include_all) - return ((intel_iommu_state_t *) - drhd->di_iommu); - - /* - * try to match the device scope - */ - for_each_in_list(&(drhd->di_dev_list), devs) { - - /* - * get a perfect match - */ - if (private && - devs->pds_bus == bus && - devs->pds_dev == dev && - devs->pds_func == func) { - return ((intel_iommu_state_t *) - (drhd->di_iommu)); - } - - /* - * maybe under a scope of a p2p - */ - if (devs->pds_type == 0x2 && - dmar_check_sub(dip, seg, devs)) - return ((intel_iommu_state_t *) - (drhd->di_iommu)); - } - } - - /* - * This may happen with buggy versions of BIOSes. Just warn instead - * of panic as we don't want whole system to go down because of one - * device. - */ - cmn_err(CE_WARN, "can't match iommu for %s\n", - ddi_node_name(dip)); - - return (NULL); -} - -/* - * domain_set_root_context - * set root context for a single device - */ -static void -domain_set_root_context(dmar_domain_state_t *domain, - pci_dev_info_t *info, uint_t agaw) -{ - caddr_t root, context; - paddr_t paddr; - iorce_t rce; - uint_t bus, devfn; - intel_iommu_state_t *iommu; - uint_t aw_code; - - ASSERT(domain); - iommu = domain->dm_iommu; - ASSERT(iommu); - bus = info->pdi_bus; - devfn = info->pdi_devfn; - aw_code = (agaw - 30) / 9; - - /* - * set root entry - */ - root = iommu_get_vaddr(iommu, iommu->iu_root_entry_paddr); - rce = (iorce_t)root + bus; - mutex_enter(&(iommu->iu_root_context_lock)); - if (!ROOT_ENTRY_GET_P(rce)) { - paddr = iommu_get_page(iommu, KM_SLEEP); - ROOT_ENTRY_SET_P(rce); - ROOT_ENTRY_SET_CTP(rce, paddr); - iommu->iu_dmar_ops->do_clflush((caddr_t)rce, sizeof (*rce)); - context = iommu_get_vaddr(iommu, paddr); - } else { - paddr = ROOT_ENTRY_GET_CTP(rce); - context = iommu_get_vaddr(iommu, paddr); - } - - /* set context entry */ - rce = (iorce_t)context + devfn; - if (!CONT_ENTRY_GET_P(rce)) { - paddr = domain->dm_page_table_paddr; - CONT_ENTRY_SET_P(rce); - CONT_ENTRY_SET_ASR(rce, paddr); - CONT_ENTRY_SET_AW(rce, aw_code); - CONT_ENTRY_SET_DID(rce, domain->dm_domain_id); - iommu->iu_dmar_ops->do_clflush((caddr_t)rce, sizeof (*rce)); - } else if (CONT_ENTRY_GET_ASR(rce) != - domain->dm_page_table_paddr) { - cmn_err(CE_PANIC, "root context entries for" - " %d, %d, %d has been set", bus, - devfn >>3, devfn & 0x7); - /*NOTREACHED*/ - } - - mutex_exit(&(iommu->iu_root_context_lock)); - - /* cache mode set, flush context cache */ - if (IOMMU_CAP_GET_CM(iommu->iu_capability)) { - iommu->iu_dmar_ops->do_context_fsi(iommu, 0, - (bus << 8) | devfn, domain->dm_domain_id); - iommu->iu_dmar_ops->do_iotlb_dsi(iommu, domain->dm_domain_id); - /* cache mode not set, flush write buffer */ - } else { - iommu->iu_dmar_ops->do_flwb(iommu); - } -} - -/* - * setup_single_context() - * setup the root context entry - */ -static void -setup_single_context(dmar_domain_state_t *domain, - int seg, int bus, int devfn) -{ - pci_dev_info_t info; - - info.pdi_seg = seg; - info.pdi_bus = bus; - info.pdi_devfn = devfn; - - domain_set_root_context(domain, &info, - domain->dm_iommu->iu_agaw); -} - -/* - * setup_context_walk() - * the walk function to set up the possible context entries - */ -static int -setup_context_walk(dev_info_t *dip, void *arg) -{ - dmar_domain_state_t *domain = arg; - iommu_private_t *private; - - private = DEVI(dip)->devi_iommu_private; - if (private == NULL && iommu_set_private(dip) != DDI_SUCCESS) { - cmn_err(CE_PANIC, "setup_context_walk: cannot find private"); - /*NOTREACHED*/ - } - private = DEVI(dip)->devi_iommu_private; - ASSERT(private); - - setup_single_context(domain, private->idp_seg, - private->idp_bus, private->idp_devfn); - - return (DDI_WALK_PRUNECHILD); -} - -/* - * setup_possible_contexts() - * set up all the possible context entries for a device under ppb - */ -static void -setup_possible_contexts(dmar_domain_state_t *domain, dev_info_t *dip) -{ - int count; - iommu_private_t *private; - private = DEVI(dip)->devi_iommu_private; - - ASSERT(private); - - /* for pci-pci bridge */ - if (private->idp_bbp_type == IOMMU_PPB_PCI_PCI) { - setup_single_context(domain, private->idp_seg, - private->idp_bus, private->idp_devfn); - return; - } - - /* for pcie-pci bridge */ - setup_single_context(domain, private->idp_seg, - private->idp_bus, private->idp_devfn); - setup_single_context(domain, private->idp_seg, - private->idp_sec, 0); - - /* for functions under pcie-pci bridge */ - ndi_devi_enter(dip, &count); - ddi_walk_devs(ddi_get_child(dip), setup_context_walk, domain); - ndi_devi_exit(dip, count); -} - -/* - * iommu_alloc_domain() - * allocate a domain for device, the result is returned in domain parameter - */ -static int -iommu_alloc_domain(dev_info_t *dip, dmar_domain_state_t **domain) -{ - iommu_private_t *private, *b_private; - dev_info_t *bdip = NULL, *ldip = NULL; - dmar_domain_state_t *new; - pci_dev_info_t info; - uint_t need_to_set_parent = 0; - int count; - - private = DEVI(dip)->devi_iommu_private; - if (private == NULL) { - cmn_err(CE_PANIC, "iommu private is NULL (%s)\n", - ddi_node_name(dip)); - } - - /* - * check if the domain has already allocated without lock held. - */ - if (private->idp_intel_domain) { - *domain = INTEL_IOMMU_PRIVATE(private->idp_intel_domain); - return (DDI_SUCCESS); - } - - /* - * lock strategy for dip->devi_iommu_private->idp_intel_domain field: - * 1) read access is allowed without lock held. - * 2) write access is protected by ndi_devi_enter(dip, &count). Lock - * on dip will protect itself and all descendants. - * 3) lock will be released if in-kernel and iommu hardware data - * strutures have been synchronized. - */ - ndi_hold_devi(dip); - bdip = iommu_get_pci_top_bridge(dip); - ASSERT(bdip == NULL || DEVI(bdip)->devi_iommu_private); - ldip = (bdip != NULL) ? bdip : dip; - ndi_devi_enter(ldip, &count); - - /* - * double check if the domain has already created by other thread. - */ - if (private->idp_intel_domain) { - ndi_devi_exit(ldip, count); - ndi_rele_devi(dip); - *domain = INTEL_IOMMU_PRIVATE(private->idp_intel_domain); - return (DDI_SUCCESS); - } - - /* - * check to see if it is under a pci bridge - */ - if (bdip != NULL) { - b_private = DEVI(bdip)->devi_iommu_private; - ASSERT(b_private); - if (b_private->idp_intel_domain) { - new = INTEL_IOMMU_PRIVATE(b_private->idp_intel_domain); - goto get_domain_finish; - } else { - need_to_set_parent = 1; - } - } - - /* - * OK, we have to allocate a new domain - */ - new = kmem_alloc(sizeof (dmar_domain_state_t), KM_SLEEP); - new->dm_iommu = iommu_get_dmar(dip); - if (new->dm_iommu == NULL || iommu_domain_init(new) != DDI_SUCCESS) { - ndi_devi_exit(ldip, count); - ndi_rele_devi(dip); - kmem_free(new, sizeof (dmar_domain_state_t)); - *domain = NULL; - return (DDI_FAILURE); - } - -get_domain_finish: - /* - * setup root context entries - */ - if (bdip == NULL) { - info.pdi_seg = private->idp_seg; - info.pdi_bus = private->idp_bus; - info.pdi_devfn = private->idp_devfn; - domain_set_root_context(new, &info, new->dm_iommu->iu_agaw); - } else if (need_to_set_parent) { - setup_possible_contexts(new, bdip); - membar_producer(); - b_private->idp_intel_domain = (void *)new; - } - membar_producer(); - private->idp_intel_domain = (void *)new; - - ndi_devi_exit(ldip, count); - ndi_rele_devi(dip); - *domain = new; - - return (DDI_SUCCESS); -} - -/* - * iommu_get_domain() - * get a iommu domain for dip, and the result is returned in domain - */ -static int -iommu_get_domain(dev_info_t *dip, dmar_domain_state_t **domain) -{ - iommu_private_t *private = DEVI(dip)->devi_iommu_private; - dev_info_t *pdip; - - ASSERT(domain); - - /* - * for isa devices attached under lpc - */ - pdip = ddi_get_parent(dip); - if (strcmp(ddi_node_name(pdip), "isa") == 0) { - if (lpc_devinfo) { - return (iommu_alloc_domain(lpc_devinfo, domain)); - } else { - *domain = NULL; - return (DDI_FAILURE); - } - } - - /* - * for gart, use the real graphic devinfo - */ - if (strcmp(ddi_node_name(dip), "agpgart") == 0) { - if (gfx_devinfo) { - return (iommu_alloc_domain(gfx_devinfo, domain)); - } else { - *domain = NULL; - return (DDI_FAILURE); - } - } - - /* - * if iommu private is NULL: - * 1. try to find a cached private - * 2. if that fails try to create a new one - * 3. if this fails as well, device is probably not - * PCI and shares domain with an ancestor. - */ - if (private == NULL && iommu_set_private(dip) != DDI_SUCCESS) { - if (pdip = iommu_get_ancestor_private(dip)) { - return (iommu_alloc_domain(pdip, domain)); - } - cmn_err(CE_WARN, "Cannot find ancestor private for " - "devinfo %s%d", ddi_node_name(dip), - ddi_get_instance(dip)); - *domain = NULL; - return (DDI_FAILURE); - } - - /* - * check if the domain has already allocated - */ - private = DEVI(dip)->devi_iommu_private; - ASSERT(private); - if (private->idp_intel_domain) { - *domain = INTEL_IOMMU_PRIVATE(private->idp_intel_domain); - return (DDI_SUCCESS); - } - - /* - * allocate a domain for this device - */ - return (iommu_alloc_domain(dip, domain)); -} - -/* - * helper functions to manipulate iommu pte - */ -static void -set_pte(iopte_t pte, uint_t rw, paddr_t addr) -{ - *pte |= (rw & 0x3); - *pte |= (addr & IOMMU_PAGE_MASK); -} - -static paddr_t -pte_get_paddr(iopte_t pte) -{ - return (*pte & IOMMU_PAGE_MASK); -} - -/* - * dvma_level_offset() - * get the page table offset by specifying a dvma and level - */ -static uint_t -dvma_level_offset(uint64_t dvma_pn, uint_t level) -{ - uint_t start_bit, offset; - - start_bit = (level - 1) * IOMMU_LEVEL_STRIDE; - offset = (dvma_pn >> start_bit) & IOMMU_LEVEL_OFFSET; - - return (offset); -} - -/* - * iommu_setup_level_table() - * setup the page table for a level - */ -static iovpte_t -iommu_setup_level_table(dmar_domain_state_t *domain, - iovpte_t pvpte, uint_t offset) -{ - iopte_t pte; - iovpte_t vpte; - paddr_t child; - caddr_t vp; - - vpte = (iovpte_t)(pvpte->vp) + offset; - pte = (iopte_t)(pvpte->pp) + offset; - - /* - * check whether pde already exists withoud lock held. - */ - if (vpte->pp != NULL) { - return (vpte); - } - - /* Speculatively allocate resources needed. */ - child = iommu_get_page(domain->dm_iommu, KM_SLEEP); - vp = kmem_zalloc(IOMMU_IOVPTE_TABLE_SIZE, KM_SLEEP); - mutex_enter(&(domain->dm_pgtable_lock)); - - /* - * double check whether pde already exists with lock held. - */ - if (vpte->pp != NULL) { - mutex_exit(&(domain->dm_pgtable_lock)); - kmem_free(vp, IOMMU_IOVPTE_TABLE_SIZE); - iommu_free_page(domain->dm_iommu, child); - return (vpte); - } - set_pte(pte, IOMMU_PAGE_PROP_RW, child); - domain->dm_iommu->iu_dmar_ops->do_clflush((caddr_t)pte, sizeof (*pte)); - vpte->vp = vp; - - /* make previous changes visible to other threads. */ - membar_producer(); - vpte->pp = iommu_get_vaddr(domain->dm_iommu, child); - mutex_exit(&(domain->dm_pgtable_lock)); - - return (vpte); -} - -/* - * iommu_setup_page_table() - * setup the page table for a dvma - */ -static caddr_t -iommu_setup_page_table(dmar_domain_state_t *domain, uint64_t dvma) -{ - iovpte_t vpte; - uint_t level; - uint_t offset; - int i; - - level = domain->dm_iommu->iu_level; - vpte = &(domain->dm_pt_tree); - - for (i = level; i > 1; i--) { - offset = dvma_level_offset(IOMMU_BTOP(dvma), i); - vpte = iommu_setup_level_table(domain, vpte, offset); - } - - return (vpte->pp); -} - -/* - * iommu_map_page_range() - * map a range of pages for iommu translation - * - * domain: the device domain - * dvma: the start dvma for mapping - * start: the start physcial address - * end: the end physical address - * flags: misc flag - */ -static int -iommu_map_page_range(dmar_domain_state_t *domain, uint64_t dvma, - uint64_t start, uint64_t end, int flags) -{ - uint_t offset; - iopte_t pte; - caddr_t vaddr, dirt; - uint64_t paddr = start & IOMMU_PAGE_MASK; - uint64_t epaddr = end & IOMMU_PAGE_MASK; - uint64_t ioaddr = dvma & IOMMU_PAGE_MASK; - uint_t count; - - while (paddr <= epaddr) { - vaddr = iommu_setup_page_table(domain, ioaddr); - offset = dvma_level_offset(IOMMU_BTOP(ioaddr), 1); - - count = 0; - dirt = (caddr_t)((iopte_t)vaddr + offset); - while ((paddr <= epaddr) && (offset < IOMMU_PTE_MAX)) { - pte = (iopte_t)vaddr + offset; - if (*pte != NULL) { - if (pte_get_paddr(pte) != paddr) { - cmn_err(CE_WARN, "try to set " - "non-NULL pte"); - } - } else { - set_pte(pte, IOMMU_PAGE_PROP_RW, paddr); - } - paddr += IOMMU_PAGE_SIZE; - offset++; - count++; - } - - /* flush cpu and iotlb cache */ - domain->dm_iommu->iu_dmar_ops->do_clflush(dirt, - count * sizeof (uint64_t)); - - if (!(flags & IOMMU_PAGE_PROP_NOSYNC)) { - /* cache mode set, flush iotlb */ - if (IOMMU_CAP_GET_CM(domain->dm_iommu->iu_capability)) { - domain->dm_iommu->iu_dmar_ops-> - do_iotlb_psi(domain->dm_iommu, - 0, ioaddr, count, TLB_IVA_WHOLE); - /* cache mode not set, flush write buffer */ - } else { - domain->dm_iommu->iu_dmar_ops-> - do_flwb(domain->dm_iommu); - } - } - - ioaddr += IOMMU_PTOB(count); - } - - return (DDI_SUCCESS); -} - -/* - * iommu_vmem_walker() - */ -static void -iommu_vmem_walker(void *arg, void *base, size_t size) -{ - vmem_walk_arg_t *warg = (vmem_walk_arg_t *)arg; - rmrr_info_t *rmrr = warg->vwa_rmrr; - dmar_domain_state_t *domain = warg->vwa_domain; - dev_info_t *dip = warg->vwa_dip; - uint64_t start, end; - - start = MAX(rmrr->ri_baseaddr, (uint64_t)(intptr_t)base); - end = MIN(rmrr->ri_limiaddr + 1, (uint64_t)(intptr_t)base + size); - if (start < end) { - cmn_err(CE_WARN, "rmrr overlap with physmem [0x%" - PRIx64 " - 0x%" PRIx64 "] for %s", start, end, - ddi_node_name(dip)); - - (void) vmem_xalloc(domain->dm_dvma_map, - end - start, /* size */ - IOMMU_PAGE_SIZE, /* align/quantum */ - 0, /* phase */ - 0, /* nocross */ - (void *)(uintptr_t)start, /* minaddr */ - (void *)(uintptr_t)end, /* maxaddr */ - VM_NOSLEEP); - } -} - -/* - * build_single_rmrr_identity_map() - * build identity map for a single rmrr unit - */ -static void -build_single_rmrr_identity_map(rmrr_info_t *rmrr) -{ - pci_dev_scope_t *devs; - pci_dev_info_t info; - uint64_t start, end, size; - dmar_domain_state_t *domain; - vmem_walk_arg_t warg; - - info.pdi_seg = rmrr->ri_segment; - for_each_in_list(&(rmrr->ri_dev_list), devs) { - info.pdi_bus = devs->pds_bus; - info.pdi_devfn = (devs->pds_dev << 3) | - devs->pds_func; - - if (get_dip_from_info(&info) != DDI_SUCCESS) { - cmn_err(CE_NOTE, "RMRR: device [%x,%x,%x] listed in " - "ACPI DMAR table does not exist, ignoring", - info.pdi_bus, GET_DEV(info.pdi_devfn), - GET_FUNC(info.pdi_devfn)); - continue; - } - - if (iommu_get_domain(info.pdi_dip, &domain) != DDI_SUCCESS) { - cmn_err(CE_WARN, "rmrr: get domain for %s failed", - ddi_node_name(info.pdi_dip)); - continue; - } - - start = rmrr->ri_baseaddr; - end = rmrr->ri_limiaddr; - size = end - start + 1; - - if (!address_in_memlist(bios_rsvd, start, size)) { - cmn_err(CE_WARN, "bios issue: " - "rmrr [0x%" PRIx64 " - 0x%" PRIx64 "]\n" - "is not in reserved memory range\n", - start, end); - } - - (void) iommu_map_page_range(domain, - start, start, end, - DDI_DMA_READ | DDI_DMA_WRITE | - IOMMU_PAGE_PROP_NOSYNC); - - /* - * rmrr should never overlap phy_mem - */ - warg.vwa_rmrr = rmrr; - warg.vwa_domain = domain; - warg.vwa_dip = info.pdi_dip; - vmem_walk(domain->dm_dvma_map, VMEM_SPAN | VMEM_REENTRANT, - iommu_vmem_walker, &warg); - } -} - -/* - * build_rmrr_identity_map() - * build identity mapping for devices under rmrr scopes - */ -static void -build_rmrr_identity_map(void) -{ - rmrr_info_t *rmrr; - int i; - - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - if (list_is_empty(&(dmar_info->dmari_rmrr[i]))) - break; - for_each_in_list(&(dmar_info->dmari_rmrr[i]), rmrr) { - list_insert_tail(&rmrr_states, rmrr); - build_single_rmrr_identity_map(rmrr); - } - } -} - -/* - * drhd_only_for_gfx() - * return TRUE, if the drhd is only for gfx - */ -static boolean_t -drhd_only_for_gfx(intel_iommu_state_t *iommu) -{ - drhd_info_t *drhd = iommu->iu_drhd; - pci_dev_scope_t *devs; - pci_dev_info_t info; - int dev_num; - - if (drhd->di_include_all) - return (B_FALSE); - - /* get the device number attached to this drhd */ - dev_num = 0; - for_each_in_list(&(drhd->di_dev_list), devs) { - dev_num++; - } - - if (dev_num == 1) { - iommu_private_t *private; - devs = list_head(&(drhd->di_dev_list)); - info.pdi_seg = drhd->di_segment; - info.pdi_bus = devs->pds_bus; - info.pdi_devfn = (devs->pds_dev << 3) + - (devs->pds_func & 0x7); - - if (get_dip_from_info(&info) != DDI_SUCCESS) { - return (B_FALSE); - } - - private = DEVI(info.pdi_dip)->devi_iommu_private; - ASSERT(private); - if (private->idp_is_display) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * build_dev_identity_map() - * build identity map for a device - */ -static void -build_dev_identity_map(dev_info_t *dip) -{ - struct memlist *mp; - dmar_domain_state_t *domain; - - if (iommu_get_domain(dip, &domain) != DDI_SUCCESS) { - cmn_err(CE_WARN, "build identity map for %s failed," - "this device may not be functional", - ddi_node_name(dip)); - return; - } - - memlist_read_lock(); - mp = phys_install; - while (mp != NULL) { - (void) iommu_map_page_range(domain, - mp->ml_address & IOMMU_PAGE_MASK, - mp->ml_address & IOMMU_PAGE_MASK, - (mp->ml_address + mp->ml_size - 1) & IOMMU_PAGE_MASK, - DDI_DMA_READ | DDI_DMA_WRITE | - IOMMU_PAGE_PROP_NOSYNC); - mp = mp->ml_next; - } - - memlist_read_unlock(); - - /* - * record the identity map for domain, any device - * which uses this domain will needn't any further - * map - */ - domain->dm_identity = B_TRUE; -} - -/* - * build dma map for bios reserved memspace - */ -static void -map_bios_rsvd_mem_pool(dev_info_t *dip) -{ - struct memlist *mp; - dmar_domain_state_t *domain; - - if (iommu_get_domain(dip, &domain) != DDI_SUCCESS) { - cmn_err(CE_WARN, "get domain for %s failed", - ddi_node_name(dip)); - return; - } - - mp = bios_rsvd; - while (mp != 0) { - (void) iommu_map_page_range(domain, - mp->ml_address & IOMMU_PAGE_MASK, - mp->ml_address & IOMMU_PAGE_MASK, - (mp->ml_address + mp->ml_size - 1) & IOMMU_PAGE_MASK, - DDI_DMA_READ | DDI_DMA_WRITE | - IOMMU_PAGE_PROP_NOSYNC); - cmn_err(CE_CONT, "?Mapping Reservd [0x%" PRIx64 - " - 0x%" PRIx64 "]\n", mp->ml_address, - (mp->ml_address + mp->ml_size)); - mp = mp->ml_next; - } -} - -/* - * build_isa_gfx_identity_walk() - * the walk function for build_isa_gfx_identity_map() - */ -static int -build_isa_gfx_identity_walk(dev_info_t *dip, void *arg) -{ - dmar_domain_state_t *domain; - _NOTE(ARGUNUSED(arg)) - - iommu_private_t *private; - - if (DEVI(dip)->devi_iommu_private == NULL && - iommu_set_private(dip) != DDI_SUCCESS) { - /* ignore devices which cannot have private struct */ - return (DDI_WALK_CONTINUE); - } - - private = DEVI(dip)->devi_iommu_private; - - ASSERT(private); - - /* fix the gfx and fd */ - if (private->idp_is_display) { - gfx_devinfo = dip; - build_dev_identity_map(dip); - return (DDI_WALK_CONTINUE); - } else if (private->idp_is_lpc) { - lpc_devinfo = dip; - return (DDI_WALK_CONTINUE); - } - - if (!(usb_rmrr_quirk || usb_page0_quirk || usb_fullpa_quirk)) { - return (DDI_WALK_CONTINUE); - } - - if (!((strcmp(ddi_driver_name(dip), "uhci") == 0) || - (strcmp(ddi_driver_name(dip), "ehci") == 0) || - (strcmp(ddi_driver_name(dip), "ohci") == 0))) { - return (DDI_WALK_CONTINUE); - } - - /* workaround for usb leagcy emulation mode */ - if (usb_rmrr_quirk) { - map_bios_rsvd_mem_pool(dip); - cmn_err(CE_CONT, - "?Workaround for %s USB rmrr\n", - ddi_node_name(dip)); - } - - /* - * Identify usb ehci and uhci controllers - */ - if (usb_fullpa_quirk) { - build_dev_identity_map(dip); - cmn_err(CE_CONT, - "?Workaround for %s USB phys install mem\n", - ddi_node_name(dip)); - return (DDI_WALK_CONTINUE); - } - - if (usb_page0_quirk) { - if (iommu_get_domain(dip, &domain) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "Unable to setup usb-quirk for %s failed," - "this device may not be functional", - ddi_node_name(dip)); - return (DDI_WALK_CONTINUE); - } - (void) iommu_map_page_range(domain, - 0, 0, 0, DDI_DMA_READ | DDI_DMA_WRITE | - IOMMU_PAGE_PROP_NOSYNC); - cmn_err(CE_CONT, "?Workaround for %s USB [0-4k]\n", - ddi_node_name(dip)); - } - - return (DDI_WALK_CONTINUE); -} - -/* - * build_isa_gfx_identity_map() - * build identity map for isa and gfx devices - */ -static void -build_isa_gfx_identity_map(void) -{ - int count; - - /* - * walk through the entire device tree - */ - ndi_devi_enter(root_devinfo, &count); - ddi_walk_devs(ddi_get_child(root_devinfo), - build_isa_gfx_identity_walk, NULL); - ndi_devi_exit(root_devinfo, count); -} - -/* - * dmar_check_boot_option() - * check the intel iommu boot option - */ -static void -dmar_check_boot_option(char *opt, int *var) -{ - int len; - char *boot_option; - - if ((len = do_bsys_getproplen(NULL, opt)) > 0) { - boot_option = kmem_alloc(len, KM_SLEEP); - (void) do_bsys_getprop(NULL, opt, boot_option); - if (strcmp(boot_option, "yes") == 0 || - strcmp(boot_option, "true") == 0) { - cmn_err(CE_CONT, "\"%s=true\" was set\n", - opt); - *var = 1; - } else if (strcmp(boot_option, "no") == 0 || - strcmp(boot_option, "false") == 0) { - cmn_err(CE_CONT, "\"%s=false\" was set\n", - opt); - *var = 0; - } - kmem_free(boot_option, len); - } -} - -extern void (*rootnex_iommu_init)(void); - -/* - * intel_iommu_attach_dmar_nodes() - * attach intel iommu nodes - */ -int -intel_iommu_attach_dmar_nodes(void) -{ - drhd_info_t *drhd; - intel_iommu_state_t *iommu; - int i; - - /* - * retrieve the dmar boot options - */ - cmn_err(CE_CONT, "?Start to check dmar related boot options\n"); - dmar_check_boot_option("dmar-gfx-disable", &gfx_drhd_disable); - dmar_check_boot_option("dmar-drhd-disable", &dmar_drhd_disable); - dmar_check_boot_option("usb-page0-quirk", &usb_page0_quirk); - dmar_check_boot_option("usb-fullpa-quirk", &usb_fullpa_quirk); - dmar_check_boot_option("usb-rmrr-quirk", &usb_rmrr_quirk); - dmar_check_boot_option("qinv-disable", &qinv_disable); - dmar_check_boot_option("intrr-disable", &intrr_disable); - - /* - * init the lists - */ - list_create(&iommu_states, sizeof (intel_iommu_state_t), - offsetof(intel_iommu_state_t, node)); - list_create(&domain_states, sizeof (dmar_domain_state_t), - offsetof(dmar_domain_state_t, node)); - list_create(&rmrr_states, sizeof (rmrr_info_t), - offsetof(rmrr_info_t, node4states)); - - root_devinfo = ddi_root_node(); - ASSERT(root_devinfo); - - check_hwquirk(); - - iommu_page_init(); - - /* - * initiate each iommu unit - */ - cmn_err(CE_CONT, "?Start to create iommu state structures\n"); - for (i = 0; i < DMAR_MAX_SEGMENT; i++) { - for_each_in_list(&(dmar_info->dmari_drhd[i]), drhd) { - if (create_iommu_state(drhd) != DDI_SUCCESS) - goto iommu_init_fail; - } - } - - /* - * register interrupt remap ops - */ - if ((dmar_info->dmari_intr_remap == B_TRUE) && !intrr_disable) { - psm_vt_ops = &intr_remap_ops; - } - - /* - * build identity map for devices in the rmrr scope - */ - cmn_err(CE_CONT, "?Start to prepare identity map for rmrr\n"); - build_rmrr_identity_map(); - - /* - * build identity map for isa and gfx devices - */ - cmn_err(CE_CONT, "?Start to prepare identity map for gfx\n"); - build_isa_gfx_identity_map(); - - /* - * initialize the dvma cookie cache - */ - for (i = 0; i < MAX_COOKIE_CACHE_SIZE; i++) { - mutex_init(&(cookie_cache[i].dch_lock), NULL, - MUTEX_DRIVER, NULL); - cookie_cache[i].dch_count = 0; - cookie_cache[i].dch_next = NULL; - } - - /* - * regist the intr add function - */ - rootnex_iommu_init = intel_iommu_init; - - return (DDI_SUCCESS); - -iommu_init_fail: - - /* - * free iommu state structure - */ - while (iommu = list_head(&iommu_states)) { - list_remove(&iommu_states, iommu); - destroy_iommu_state(iommu); - } - list_destroy(&iommu_states); - - return (DDI_FAILURE); -} - -/* - * get_level_table() - * get level n page table, NULL is returned if - * failure encountered - */ -static caddr_t -get_level_table(dmar_domain_state_t *domain, - uint64_t dvma_pn, uint_t n) -{ - iovpte_t vpte; - uint_t level; - uint_t i, offset; - - level = domain->dm_iommu->iu_level; - ASSERT(level >= n); - vpte = &(domain->dm_pt_tree); - - /* walk to the level n page table */ - for (i = level; i > n; i--) { - offset = dvma_level_offset(dvma_pn, i); - vpte = (iovpte_t)(vpte->vp) + offset; - } - - return (vpte->pp); -} - -/* - * iommu_alloc_cookie_array() - * allocate the cookie array which is needed by map sgl - */ -static int -iommu_alloc_cookie_array(rootnex_dma_t *dma, - struct ddi_dma_req *dmareq, uint_t prealloc) -{ - int kmflag; - rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo); - - /* figure out the rough estimate of array size */ - sinfo->si_max_pages = - (dmareq->dmar_object.dmao_size + IOMMU_PAGE_OFFSET) / - sinfo->si_max_cookie_size + 1; - - /* the preallocated buffer fit this size */ - if (sinfo->si_max_pages <= prealloc) { - dma->dp_cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer; - dma->dp_need_to_free_cookie = B_FALSE; - /* we need to allocate new array */ - } else { - /* convert the sleep flags */ - if (dmareq->dmar_fp == DDI_DMA_SLEEP) { - kmflag = KM_SLEEP; - } else { - kmflag = KM_NOSLEEP; - } - - dma->dp_cookie_size = sinfo->si_max_pages * - sizeof (ddi_dma_cookie_t); - dma->dp_cookies = kmem_alloc(dma->dp_cookie_size, kmflag); - if (dma->dp_cookies == NULL) { - return (IOMMU_SGL_NORESOURCES); - } - dma->dp_need_to_free_cookie = B_TRUE; - } - - /* allocate the dvma cookie array */ - dma->dp_dvma_cookies = get_dvma_cookie_array(sinfo->si_max_pages); - - return (IOMMU_SGL_SUCCESS); -} - -/* - * iommu_alloc_dvma() - * alloc a dvma range for the caller - */ -static int -iommu_alloc_dvma(dmar_domain_state_t *domain, uint_t size, - ddi_dma_impl_t *hp, uint64_t *dvma, uint_t cnt) -{ - rootnex_dma_t *dma; - ddi_dma_attr_t *dma_attr; - iommu_dvma_cookie_t *dcookie; - uint64_t ioaddr; - size_t xsize, align, nocross; - uint64_t minaddr, maxaddr; - - /* shotcuts */ - dma = (rootnex_dma_t *)hp->dmai_private; - dma_attr = &(hp->dmai_attr); - dcookie = dma->dp_dvma_cookies; - - /* parameters */ - xsize = (size + IOMMU_PAGE_OFFSET) & IOMMU_PAGE_MASK; - align = MAX((size_t)(dma_attr->dma_attr_align), IOMMU_PAGE_SIZE); - nocross = (size_t)(dma_attr->dma_attr_seg + 1); - minaddr = dma_attr->dma_attr_addr_lo; - maxaddr = dma_attr->dma_attr_addr_hi + 1; - - /* handle the rollover cases */ - if (maxaddr < dma_attr->dma_attr_addr_hi) { - maxaddr = dma_attr->dma_attr_addr_hi; - } - - /* get from cache first */ - ioaddr = iommu_dvma_cache_get(domain, xsize, align, nocross); - - if (ioaddr == NULL) { - /* allocate from vmem arena */ - ioaddr = (uint64_t)(uintptr_t)vmem_xalloc(domain->dm_dvma_map, - xsize, align, 0, nocross, - (void *)(uintptr_t)minaddr, - (void *)(uintptr_t)maxaddr, - VM_NOSLEEP); - - /* if xalloc failed, we have to flush the cache and retry */ - if (ioaddr == NULL) { - iommu_dvma_cache_flush(domain, dma->dp_dip); - ioaddr = (uint64_t)(uintptr_t)vmem_xalloc( - domain->dm_dvma_map, - xsize, align, 0, nocross, - (void *)(uintptr_t)minaddr, - (void *)(uintptr_t)maxaddr, - VM_NOSLEEP); - ASSERT(ioaddr); - } - } - - ASSERT(ioaddr >= minaddr); - ASSERT(ioaddr + size - 1 < maxaddr); - - *dvma = ioaddr; - - /* - * save the dvma range in the device dvma cookie - */ - dcookie[cnt].dc_addr = ioaddr; - dcookie[cnt].dc_size = xsize; - dcookie[cnt].dc_domain = domain; - dcookie[cnt].dc_align = align; - - return (DDI_SUCCESS); -} - -/* - * iommu_map_dvma() - * map dvma to the physical addresses, the actual - * mapped dvma page number is returned - */ -static int -iommu_map_dvma(dmar_domain_state_t *domain, uint64_t dvma, - uint64_t paddr, uint_t psize, struct ddi_dma_req *dmareq) -{ - uint64_t start, end; - int flags; - - start = paddr & IOMMU_PAGE_MASK; - end = (paddr + psize - 1) & IOMMU_PAGE_MASK; - flags = dmareq->dmar_flags & DDI_DMA_RDWR; - - /* map each physical address */ - (void) iommu_map_page_range(domain, dvma, start, end, flags); - return (IOMMU_BTOP(end - start) + 1); -} - -/* - * intel_iommu_map_sgl() - * called from rootnex_dma_bindhdl(), to build dma - * cookies when iommu is enabled - */ -int -intel_iommu_map_sgl(ddi_dma_handle_t handle, - struct ddi_dma_req *dmareq, uint_t prealloc) -{ - ddi_dma_atyp_t buftype; - uint64_t offset; - page_t **pparray; - uint64_t paddr; - uint64_t dvma; - uint_t psize; - uint_t size; - uint64_t maxseg; - caddr_t vaddr; - uint_t pcnt, cnt; - page_t *page; - ddi_dma_cookie_t *sgl; - rootnex_sglinfo_t *sglinfo; - ddi_dma_obj_t *dmar_object; - ddi_dma_impl_t *hp; - rootnex_dma_t *dma; - dmar_domain_state_t *domain; - int e; - - hp = (ddi_dma_impl_t *)handle; - dma = (rootnex_dma_t *)hp->dmai_private; - sglinfo = &(dma->dp_sglinfo); - dmar_object = &(dmareq->dmar_object); - maxseg = sglinfo->si_max_cookie_size; - pparray = dmar_object->dmao_obj.virt_obj.v_priv; - vaddr = dmar_object->dmao_obj.virt_obj.v_addr; - buftype = dmar_object->dmao_type; - size = dmar_object->dmao_size; - - /* get domain for the dma request */ - if (iommu_get_domain(dma->dp_dip, &domain) != DDI_SUCCESS) { - cmn_err(CE_WARN, "get domain for %s failed", - ddi_node_name(dma->dp_dip)); - return (IOMMU_SGL_NORESOURCES); - } - - /* direct return if drhd is disabled */ - if (!(domain->dm_iommu->iu_enabled & DMAR_ENABLE) || - domain->dm_identity) - return (IOMMU_SGL_DISABLE); - - /* - * allocate the cookies arrays, if the pre-allocated - * space is not enough, we should reallocate it - */ - if (iommu_alloc_cookie_array(dma, dmareq, prealloc) - != IOMMU_SGL_SUCCESS) - return (IOMMU_SGL_NORESOURCES); - hp->dmai_cookie = dma->dp_cookies; - sgl = dma->dp_cookies; - - pcnt = 0; - cnt = 0; - - /* retrieve paddr, psize, offset from dmareq */ - if (buftype == DMA_OTYP_PAGES) { - page = dmar_object->dmao_obj.pp_obj.pp_pp; - ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); - offset = dmar_object->dmao_obj.pp_obj.pp_offset & - MMU_PAGEOFFSET; - paddr = pfn_to_pa(page->p_pagenum) + offset; - psize = MIN((MMU_PAGESIZE - offset), size); - sglinfo->si_asp = NULL; - page = page->p_next; - } else { - ASSERT((buftype == DMA_OTYP_VADDR) || - (buftype == DMA_OTYP_BUFVADDR)); - sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as; - if (sglinfo->si_asp == NULL) { - sglinfo->si_asp = &kas; - } - offset = (uintptr_t)vaddr & MMU_PAGEOFFSET; - - if (pparray != NULL) { - ASSERT(!PP_ISFREE(pparray[pcnt])); - paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset; - psize = MIN((MMU_PAGESIZE - offset), size); - pcnt++; - } else { - paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, - vaddr)) + offset; - psize = MIN(size, (MMU_PAGESIZE - offset)); - vaddr += psize; - } - } - - /* save the iommu page offset */ - sglinfo->si_buf_offset = offset & IOMMU_PAGE_OFFSET; - - /* - * allocate the dvma and map [paddr, paddr+psize) - */ - e = iommu_alloc_dvma(domain, MIN(size + sglinfo->si_buf_offset, - maxseg), hp, &dvma, cnt); - if (e != DDI_SUCCESS) - return (IOMMU_SGL_NORESOURCES); - e = iommu_map_dvma(domain, dvma, paddr, psize, dmareq); - - /* - * setup the first cookie with the dvma of the page - * and the its size, we don't take account in the - * offset into the first page now - */ - sgl[cnt].dmac_laddress = dvma; - sgl[cnt].dmac_size = psize + sglinfo->si_buf_offset; - sgl[cnt].dmac_type = 0; - dvma += IOMMU_PTOB(e); - - size -= psize; - while (size > 0) { - /* get the size for this page (i.e. partial or full page) */ - psize = MIN(size, MMU_PAGESIZE); - if (buftype == DMA_OTYP_PAGES) { - /* get the paddr from the page_t */ - ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); - paddr = pfn_to_pa(page->p_pagenum); - page = page->p_next; - } else if (pparray != NULL) { - /* index into the array of page_t's to get the paddr */ - ASSERT(!PP_ISFREE(pparray[pcnt])); - paddr = pfn_to_pa(pparray[pcnt]->p_pagenum); - pcnt++; - } else { - /* call into the VM to get the paddr */ - paddr = pfn_to_pa(hat_getpfnum - (sglinfo->si_asp->a_hat, vaddr)); - vaddr += psize; - } - - /* - * check to see if this page would put us - * over the max cookie size - */ - if ((sgl[cnt].dmac_size + psize) > maxseg) { - /* use the next cookie */ - cnt++; - - /* allocate the dvma and map [paddr, paddr+psize) */ - e = iommu_alloc_dvma(domain, MIN(size, maxseg), - hp, &dvma, cnt); - if (e != DDI_SUCCESS) - return (IOMMU_SGL_NORESOURCES); - e = iommu_map_dvma(domain, dvma, paddr, psize, dmareq); - - /* save the cookie information */ - sgl[cnt].dmac_laddress = dvma; - sgl[cnt].dmac_size = psize; - sgl[cnt].dmac_type = 0; - dvma += IOMMU_PTOB(e); - - /* - * we can add this page in the current cookie - */ - } else { - e = iommu_map_dvma(domain, dvma, paddr, psize, dmareq); - sgl[cnt].dmac_size += psize; - dvma += IOMMU_PTOB(e); - } - - size -= psize; - } - - /* take account in the offset into the first page */ - sgl[0].dmac_laddress += sglinfo->si_buf_offset; - sgl[0].dmac_size -= sglinfo->si_buf_offset; - - /* save away how many cookies we have */ - sglinfo->si_sgl_size = cnt + 1; - - return (IOMMU_SGL_SUCCESS); -} - -/* - * iommu_clear_leaf_pte() - * clear a single leaf pte - */ -static void -iommu_clear_leaf_pte(dmar_domain_state_t *domain, uint64_t dvma, uint64_t size) -{ - iopte_t pte; - uint_t offset; - caddr_t leaf_table, dirt; - uint64_t csize = 0; - uint64_t cdvma = dvma & IOMMU_PAGE_MASK; - int count; - - while (csize < size) { - - /* retrieve the leaf page table */ - leaf_table = get_level_table(domain, IOMMU_BTOP(cdvma), 1); - if (!leaf_table) { - cmn_err(CE_WARN, "get level 1 table for 0x%" - PRIx64 "failed", cdvma); - return; - } - - /* map the leaf page and walk to the pte */ - offset = dvma_level_offset(IOMMU_BTOP(cdvma), 1); - - /* clear the ptes */ - count = 0; - dirt = (caddr_t)((iopte_t)leaf_table + offset); - while ((csize < size) && - (offset < IOMMU_PTE_MAX)) { - pte = (iopte_t)leaf_table + offset; - if (!*pte) { - cmn_err(CE_WARN, "try to clear NULL pte"); - } else { - *pte = 0; - } - csize += IOMMU_PAGE_SIZE; - offset++; - count++; - } - - /* flush cpu and iotlb cache */ - domain->dm_iommu->iu_dmar_ops->do_clflush(dirt, - count * sizeof (uint64_t)); - domain->dm_iommu->iu_dmar_ops->do_iotlb_psi(domain->dm_iommu, - domain->dm_domain_id, cdvma, count, TLB_IVA_WHOLE); - - /* unmap the leaf page */ - cdvma += IOMMU_PTOB(count); - } -} - -/* - * intel_iommu_unmap_sgl() - * called from rootnex_dma_unbindhdl(), to unbind dma - * cookies when iommu is enabled - */ -void -intel_iommu_unmap_sgl(ddi_dma_handle_t handle) -{ - ddi_dma_impl_t *hp; - rootnex_dma_t *dma; - dmar_domain_state_t *domain; - iommu_dvma_cookie_t *dcookies; - rootnex_sglinfo_t *sinfo; - uint64_t i; - - hp = (ddi_dma_impl_t *)handle; - dma = (rootnex_dma_t *)hp->dmai_private; - dcookies = dma->dp_dvma_cookies; - sinfo = &(dma->dp_sglinfo); - - /* get the device domain, no return check needed here */ - (void) iommu_get_domain(dma->dp_dip, &domain); - - /* if the drhd is disabled, nothing will be done */ - if (!(domain->dm_iommu->iu_enabled & DMAR_ENABLE) || - domain->dm_identity) - return; - - /* the drhd is enabled */ - for (i = 0; i < sinfo->si_sgl_size; i++) { - /* clear leaf ptes */ - iommu_clear_leaf_pte(domain, dcookies[i].dc_addr, - dcookies[i].dc_size); - } - - domain->dm_iommu->iu_dmar_ops->do_reap_wait(domain->dm_iommu); - domain->dm_iommu->iu_dmar_ops->do_plant_wait(domain->dm_iommu, - dcookies, sinfo->si_sgl_size, sinfo->si_max_pages); -} - -/* - * initialize invalidation request queue structure. - * call ddi_dma_mem_alloc to allocate physical contigous - * pages for invalidation queue table - */ -static int -iommu_qinv_init(intel_iommu_state_t *iommu) -{ - inv_queue_state_t *inv_queue; - size_t size; - - ddi_dma_attr_t inv_queue_dma_attr = { - DMA_ATTR_V0, - 0U, - 0xffffffffU, - 0xffffffffU, - MMU_PAGESIZE, /* page aligned */ - 0x1, - 0x1, - 0xffffffffU, - 0xffffffffU, - 1, - 4, - 0 - }; - - ddi_device_acc_attr_t inv_queue_acc_attr = { - DDI_DEVICE_ATTR_V0, - DDI_NEVERSWAP_ACC, - DDI_STRICTORDER_ACC - }; - - if (qinv_iqa_qs > QINV_MAX_QUEUE_SIZE) - qinv_iqa_qs = QINV_MAX_QUEUE_SIZE; - - inv_queue = (inv_queue_state_t *) - kmem_zalloc(sizeof (inv_queue_state_t), KM_SLEEP); - - /* set devi_ops in dev info structure for ddi_dma_mem_alloc */ - DEVI(iommu->iu_drhd->di_dip)->devi_ops = - DEVI(ddi_root_node())->devi_ops; - - /* - * set devi_bus_dma_allochdl in dev info structure for - * ddi_dma_free_handle - */ - DEVI(iommu->iu_drhd->di_dip)->devi_bus_dma_allochdl = - DEVI(ddi_root_node()); - - if (ddi_dma_alloc_handle(iommu->iu_drhd->di_dip, - &inv_queue_dma_attr, - DDI_DMA_SLEEP, - NULL, - &(inv_queue->iq_table.dma_hdl)) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "alloc invalidation queue table handler failed\n"); - goto queue_table_handle_failed; - } - - if (ddi_dma_alloc_handle(iommu->iu_drhd->di_dip, - &inv_queue_dma_attr, - DDI_DMA_SLEEP, - NULL, - &(inv_queue->iq_sync.dma_hdl)) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "alloc invalidation queue sync mem handler failed\n"); - goto sync_table_handle_failed; - } - - inv_queue->iq_table.size = (1 << (qinv_iqa_qs + 8)); - size = inv_queue->iq_table.size * QINV_ENTRY_SIZE; - - /* alloc physical contiguous pages for invalidation queue */ - if (ddi_dma_mem_alloc(inv_queue->iq_table.dma_hdl, - size, - &inv_queue_acc_attr, - DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, - DDI_DMA_SLEEP, - NULL, - &(inv_queue->iq_table.vaddr), - &size, - &(inv_queue->iq_table.acc_hdl)) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "alloc invalidation queue table failed\n"); - goto queue_table_mem_failed; - } - - ASSERT(!((uintptr_t)inv_queue->iq_table.vaddr & MMU_PAGEOFFSET)); - bzero(inv_queue->iq_table.vaddr, size); - - /* get the base physical address of invalidation request queue */ - inv_queue->iq_table.paddr = pfn_to_pa( - hat_getpfnum(kas.a_hat, inv_queue->iq_table.vaddr)); - - inv_queue->iq_table.head = inv_queue->iq_table.tail = 0; - - inv_queue->iq_sync.size = inv_queue->iq_table.size; - size = inv_queue->iq_sync.size * QINV_SYNC_DATA_SIZE; - - /* alloc status memory for invalidation wait descriptor */ - if (ddi_dma_mem_alloc(inv_queue->iq_sync.dma_hdl, - size, - &inv_queue_acc_attr, - DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, - DDI_DMA_SLEEP, - NULL, - &(inv_queue->iq_sync.vaddr), - &size, - &(inv_queue->iq_sync.acc_hdl)) != DDI_SUCCESS) { - cmn_err(CE_WARN, - "alloc invalidation queue sync mem failed\n"); - goto sync_table_mem_failed; - } - - ASSERT(!((uintptr_t)inv_queue->iq_sync.vaddr & MMU_PAGEOFFSET)); - bzero(inv_queue->iq_sync.vaddr, size); - inv_queue->iq_sync.paddr = pfn_to_pa( - hat_getpfnum(kas.a_hat, inv_queue->iq_sync.vaddr)); - - inv_queue->iq_sync.head = inv_queue->iq_sync.tail = 0; - - mutex_init(&(inv_queue->iq_table.lock), NULL, MUTEX_DRIVER, NULL); - mutex_init(&(inv_queue->iq_sync.lock), NULL, MUTEX_DRIVER, NULL); - - /* - * init iotlb pend node for submitting invalidation iotlb - * queue request - */ - inv_queue->iotlb_pend_node = (iotlb_pend_node_t **) - kmem_zalloc(inv_queue->iq_sync.size - * sizeof (iotlb_pend_node_t *), KM_SLEEP); - - /* set invalidation queue structure */ - iommu->iu_inv_queue = inv_queue; - - return (DDI_SUCCESS); - -sync_table_mem_failed: - ddi_dma_mem_free(&(inv_queue->iq_table.acc_hdl)); - -queue_table_mem_failed: - ddi_dma_free_handle(&(inv_queue->iq_sync.dma_hdl)); - -sync_table_handle_failed: - ddi_dma_free_handle(&(inv_queue->iq_table.dma_hdl)); - -queue_table_handle_failed: - kmem_free(inv_queue, sizeof (inv_queue_state_t)); - - return (ENOMEM); -} - -/* destroy invalidation queue structure */ -static void -iommu_qinv_fini(intel_iommu_state_t *iommu) -{ - inv_queue_state_t *inv_queue; - - inv_queue = iommu->iu_inv_queue; - kmem_free(inv_queue->iotlb_pend_node, - inv_queue->iq_sync.size - * sizeof (iotlb_pend_node_t *)); - ddi_dma_mem_free(&(inv_queue->iq_sync.acc_hdl)); - ddi_dma_mem_free(&(inv_queue->iq_table.acc_hdl)); - ddi_dma_free_handle(&(inv_queue->iq_sync.dma_hdl)); - ddi_dma_free_handle(&(inv_queue->iq_table.dma_hdl)); - mutex_destroy(&(inv_queue->iq_table.lock)); - mutex_destroy(&(inv_queue->iq_sync.lock)); - kmem_free(inv_queue, sizeof (inv_queue_state_t)); -} - -/* enable queued invalidation interface */ -static void -iommu_qinv_enable(intel_iommu_state_t *iommu) -{ - inv_queue_state_t *inv_queue; - uint64_t iqa_reg_value; - uint32_t status; - - struct dmar_ops *dmar_ops; - - inv_queue = iommu->iu_inv_queue; - - iqa_reg_value = inv_queue->iq_table.paddr | qinv_iqa_qs; - - mutex_enter(&iommu->iu_reg_lock); - /* Initialize the Invalidation Queue Tail register to zero */ - iommu_put_reg64(iommu, IOMMU_REG_INVAL_QT, 0); - - /* set invalidation queue base address register */ - iommu_put_reg64(iommu, IOMMU_REG_INVAL_QAR, iqa_reg_value); - - /* enable queued invalidation interface */ - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_QIE); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_QIES), status); - mutex_exit(&iommu->iu_reg_lock); - - iommu->iu_global_cmd_reg |= IOMMU_GCMD_QIE; - iommu->iu_enabled |= QINV_ENABLE; - - /* set new queued invalidation interface */ - dmar_ops = iommu->iu_dmar_ops; - - dmar_ops->do_context_fsi = qinv_cc_fsi; - dmar_ops->do_context_dsi = qinv_cc_dsi; - dmar_ops->do_context_gbl = qinv_cc_gbl; - dmar_ops->do_iotlb_psi = qinv_iotlb_psi; - dmar_ops->do_iotlb_dsi = qinv_iotlb_dsi; - dmar_ops->do_iotlb_gbl = qinv_iotlb_gbl; - dmar_ops->do_plant_wait = qinv_plant_wait; - dmar_ops->do_reap_wait = qinv_reap_wait; -} - -/* submit invalidation request descriptor to invalidation queue */ -static void -qinv_submit_inv_dsc(intel_iommu_state_t *iommu, inv_dsc_t *dsc) -{ - inv_queue_state_t *inv_queue; - inv_queue_mem_t *iq_table; - uint_t tail; - - inv_queue = iommu->iu_inv_queue; - iq_table = &(inv_queue->iq_table); - - mutex_enter(&iq_table->lock); - tail = iq_table->tail; - iq_table->tail++; - - if (iq_table->tail == iq_table->size) - iq_table->tail = 0; - - while (iq_table->head == iq_table->tail) { - /* - * inv queue table exhausted, wait hardware to fetch - * next descriptor - */ - iq_table->head = QINV_IQA_HEAD( - iommu_get_reg64(iommu, IOMMU_REG_INVAL_QH)); - } - - bcopy(dsc, iq_table->vaddr + tail * QINV_ENTRY_SIZE, - QINV_ENTRY_SIZE); - - iommu_put_reg64(iommu, IOMMU_REG_INVAL_QT, - iq_table->tail << QINV_IQA_TAIL_SHIFT); - - mutex_exit(&iq_table->lock); -} - -/* queued invalidation interface -- invalidate context cache */ -static void -qinv_cc_common(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id, ctt_inv_g_t type) -{ - inv_dsc_t dsc; - - dsc.lo = CC_INV_DSC_LOW(function_mask, source_id, domain_id, type); - dsc.hi = CC_INV_DSC_HIGH; - - qinv_submit_inv_dsc(iommu, &dsc); - - /* record the context cache statistics */ - atomic_inc_64(&(iommu->iu_statistics.st_context_cache)); -} - -/* queued invalidation interface -- invalidate iotlb */ -static void -qinv_iotlb_common(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type) -{ - inv_dsc_t dsc; - uint8_t dr = 0; - uint8_t dw = 0; - - if (IOMMU_CAP_GET_DRD(iommu->iu_capability)) - dr = 1; - if (IOMMU_CAP_GET_DWD(iommu->iu_capability)) - dw = 1; - - switch (type) { - case TLB_INV_G_PAGE: - if (!IOMMU_CAP_GET_PSI(iommu->iu_capability) || - am > IOMMU_CAP_GET_MAMV(iommu->iu_capability) || - addr & IOMMU_PAGE_OFFSET) { - type = TLB_INV_G_DOMAIN; - goto qinv_ignore_psi; - } - dsc.lo = IOTLB_INV_DSC_LOW(domain_id, dr, dw, type); - dsc.hi = IOTLB_INV_DSC_HIGH(addr, hint, am); - break; - - qinv_ignore_psi: - case TLB_INV_G_DOMAIN: - dsc.lo = IOTLB_INV_DSC_LOW(domain_id, dr, dw, type); - dsc.hi = 0; - break; - - case TLB_INV_G_GLOBAL: - dsc.lo = IOTLB_INV_DSC_LOW(0, dr, dw, type); - dsc.hi = 0; - break; - default: - cmn_err(CE_WARN, "incorrect iotlb flush type"); - return; - } - - qinv_submit_inv_dsc(iommu, &dsc); - - /* - * check the result and record the statistics - */ - switch (type) { - /* global */ - case TLB_INV_G_GLOBAL: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_global)); - break; - /* domain */ - case TLB_INV_G_DOMAIN: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_domain)); - break; - /* psi */ - case TLB_INV_G_PAGE: - atomic_inc_64(&(iommu->iu_statistics.st_iotlb_psi)); - break; - default: - break; - } -} - -/* queued invalidation interface -- invalidate dev_iotlb */ -static void -qinv_dev_iotlb_common(intel_iommu_state_t *iommu, uint16_t sid, - uint64_t addr, uint_t size, uint_t max_invs_pd) -{ - inv_dsc_t dsc; - - dsc.lo = DEV_IOTLB_INV_DSC_LOW(sid, max_invs_pd); - dsc.hi = DEV_IOTLB_INV_DSC_HIGH(addr, size); - - qinv_submit_inv_dsc(iommu, &dsc); -} - -/* queued invalidation interface -- invalidate interrupt entry cache */ -static void -qinv_iec_common(intel_iommu_state_t *iommu, uint_t iidx, uint_t im, uint_t g) -{ - inv_dsc_t dsc; - - dsc.lo = IEC_INV_DSC_LOW(iidx, im, g); - dsc.hi = IEC_INV_DSC_HIGH; - - qinv_submit_inv_dsc(iommu, &dsc); -} - -/* queued invalidation interface -- global invalidate interrupt entry cache */ -static void -qinv_iec_global(intel_iommu_state_t *iommu) -{ - qinv_iec_common(iommu, 0, 0, IEC_INV_GLOBAL); - qinv_wait_sync(iommu); -} - -/* queued invalidation interface -- invalidate single interrupt entry cache */ -static void -qinv_iec_single(intel_iommu_state_t *iommu, uint_t iidx) -{ - qinv_iec_common(iommu, iidx, 0, IEC_INV_INDEX); - qinv_wait_sync(iommu); -} - -/* queued invalidation interface -- invalidate interrupt entry caches */ -static void -qinv_iec(intel_iommu_state_t *iommu, uint_t iidx, uint_t cnt) -{ - uint_t i, mask = 0; - - ASSERT(cnt != 0); - - /* requested interrupt count is not a power of 2 */ - if (!ISP2(cnt)) { - for (i = 0; i < cnt; i++) { - qinv_iec_common(iommu, iidx + cnt, 0, IEC_INV_INDEX); - } - qinv_wait_sync(iommu); - return; - } - - while ((2 << mask) < cnt) { - mask++; - } - - if (mask > IOMMU_ECAP_GET_MHMV(iommu->iu_excapability)) { - for (i = 0; i < cnt; i++) { - qinv_iec_common(iommu, iidx + cnt, 0, IEC_INV_INDEX); - } - qinv_wait_sync(iommu); - return; - } - - qinv_iec_common(iommu, iidx, mask, IEC_INV_INDEX); - - qinv_wait_sync(iommu); -} - -/* - * alloc free entry from sync status table - */ -static uint_t -qinv_alloc_sync_mem_entry(intel_iommu_state_t *iommu) -{ - inv_queue_mem_t *sync_mem; - uint_t tail; - - sync_mem = &iommu->iu_inv_queue->iq_sync; - -sync_mem_exhausted: - mutex_enter(&sync_mem->lock); - tail = sync_mem->tail; - sync_mem->tail++; - if (sync_mem->tail == sync_mem->size) - sync_mem->tail = 0; - - if (sync_mem->head == sync_mem->tail) { - /* should never happen */ - cmn_err(CE_WARN, "sync mem exhausted\n"); - sync_mem->tail = tail; - mutex_exit(&sync_mem->lock); - delay(IOMMU_ALLOC_RESOURCE_DELAY); - goto sync_mem_exhausted; - } - mutex_exit(&sync_mem->lock); - - return (tail); -} - -/* - * queued invalidation interface -- invalidation wait descriptor - * fence flag not set, need status data to indicate the invalidation - * wait descriptor completion - */ -static void -qinv_wait_async_unfence(intel_iommu_state_t *iommu, iotlb_pend_node_t *node) -{ - inv_dsc_t dsc; - inv_queue_mem_t *sync_mem; - uint64_t saddr; - uint_t tail; - - sync_mem = &iommu->iu_inv_queue->iq_sync; - tail = qinv_alloc_sync_mem_entry(iommu); - - /* plant an iotlb pending node */ - iommu->iu_inv_queue->iotlb_pend_node[tail] = node; - - saddr = sync_mem->paddr + tail * QINV_SYNC_DATA_SIZE; - - /* - * sdata = QINV_SYNC_DATA_UNFENCE, fence = 0, sw = 1, if = 0 - * indicate the invalidation wait descriptor completion by - * performing a coherent DWORD write to the status address, - * not by generating an invalidation completion event - */ - dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_UNFENCE, 0, 1, 0); - dsc.hi = INV_WAIT_DSC_HIGH(saddr); - - qinv_submit_inv_dsc(iommu, &dsc); -} - -/* - * queued invalidation interface -- invalidation wait descriptor - * fence flag set, indicate descriptors following the invalidation - * wait descriptor must be processed by hardware only after the - * invalidation wait descriptor completes. - */ -static void -qinv_wait_async_fence(intel_iommu_state_t *iommu) -{ - inv_dsc_t dsc; - - /* sw = 0, fence = 1, iflag = 0 */ - dsc.lo = INV_WAIT_DSC_LOW(0, 1, 0, 0); - dsc.hi = 0; - qinv_submit_inv_dsc(iommu, &dsc); -} - -/* - * queued invalidation interface -- invalidation wait descriptor - * wait until the invalidation request finished - */ -static void -qinv_wait_sync(intel_iommu_state_t *iommu) -{ - inv_dsc_t dsc; - inv_queue_mem_t *sync_mem; - uint64_t saddr; - uint_t tail; - volatile uint32_t *status; - - sync_mem = &iommu->iu_inv_queue->iq_sync; - tail = qinv_alloc_sync_mem_entry(iommu); - saddr = sync_mem->paddr + tail * QINV_SYNC_DATA_SIZE; - status = (uint32_t *)(sync_mem->vaddr + tail * QINV_SYNC_DATA_SIZE); - - /* - * sdata = QINV_SYNC_DATA_FENCE, fence = 1, sw = 1, if = 0 - * indicate the invalidation wait descriptor completion by - * performing a coherent DWORD write to the status address, - * not by generating an invalidation completion event - */ - dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_FENCE, 1, 1, 0); - dsc.hi = INV_WAIT_DSC_HIGH(saddr); - - qinv_submit_inv_dsc(iommu, &dsc); - - while ((*status) != QINV_SYNC_DATA_FENCE) - iommu_cpu_nop(); - *status = QINV_SYNC_DATA_UNFENCE; -} - -/* get already completed invalidation wait requests */ -static int -qinv_wait_async_finish(intel_iommu_state_t *iommu, int *cnt) -{ - inv_queue_mem_t *sync_mem; - int index; - volatile uint32_t *value; - - ASSERT((*cnt) == 0); - - sync_mem = &iommu->iu_inv_queue->iq_sync; - - mutex_enter(&sync_mem->lock); - index = sync_mem->head; - value = (uint32_t *)(sync_mem->vaddr + index - * QINV_SYNC_DATA_SIZE); - while (*value == QINV_SYNC_DATA_UNFENCE) { - *value = 0; - (*cnt)++; - sync_mem->head++; - if (sync_mem->head == sync_mem->size) { - sync_mem->head = 0; - value = (uint32_t *)(sync_mem->vaddr); - } else - value = (uint32_t *)((char *)value + - QINV_SYNC_DATA_SIZE); - } - - mutex_exit(&sync_mem->lock); - if ((*cnt) > 0) - return (index); - else - return (-1); -} - -/* - * queued invalidation interface - * function based context cache invalidation - */ -static void -qinv_cc_fsi(intel_iommu_state_t *iommu, uint8_t function_mask, - uint16_t source_id, uint_t domain_id) -{ - qinv_cc_common(iommu, function_mask, source_id, - domain_id, CTT_INV_G_DEVICE); - qinv_wait_sync(iommu); -} - -/* - * queued invalidation interface - * domain based context cache invalidation - */ -static void -qinv_cc_dsi(intel_iommu_state_t *iommu, uint_t domain_id) -{ - qinv_cc_common(iommu, 0, 0, domain_id, CTT_INV_G_DOMAIN); - qinv_wait_sync(iommu); -} - -/* - * queued invalidation interface - * invalidation global context cache - */ -static void -qinv_cc_gbl(intel_iommu_state_t *iommu) -{ - qinv_cc_common(iommu, 0, 0, 0, CTT_INV_G_GLOBAL); - qinv_wait_sync(iommu); -} - -/* - * queued invalidation interface - * paged based iotlb invalidation - */ -static void -qinv_iotlb_psi(intel_iommu_state_t *iommu, uint_t domain_id, - uint64_t dvma, uint_t count, uint_t hint) -{ - uint_t am = 0; - uint_t max_am; - - max_am = IOMMU_CAP_GET_MAMV(iommu->iu_capability); - - /* choose page specified invalidation */ - if (IOMMU_CAP_GET_PSI(iommu->iu_capability)) { - while (am <= max_am) { - if ((ADDR_AM_OFFSET(IOMMU_BTOP(dvma), am) + count) - <= ADDR_AM_MAX(am)) { - qinv_iotlb_common(iommu, domain_id, - dvma, am, hint, TLB_INV_G_PAGE); - break; - } - am++; - } - if (am > max_am) { - qinv_iotlb_common(iommu, domain_id, - dvma, 0, hint, TLB_INV_G_DOMAIN); - } - - /* choose domain invalidation */ - } else { - qinv_iotlb_common(iommu, domain_id, dvma, - 0, hint, TLB_INV_G_DOMAIN); - } -} - -/* - * queued invalidation interface - * domain based iotlb invalidation - */ -static void -qinv_iotlb_dsi(intel_iommu_state_t *iommu, uint_t domain_id) -{ - qinv_iotlb_common(iommu, domain_id, 0, 0, 0, TLB_INV_G_DOMAIN); - qinv_wait_sync(iommu); -} - -/* - * queued invalidation interface - * global iotlb invalidation - */ -static void -qinv_iotlb_gbl(intel_iommu_state_t *iommu) -{ - qinv_iotlb_common(iommu, 0, 0, 0, 0, TLB_INV_G_GLOBAL); - qinv_wait_sync(iommu); -} - -/* - * the plant wait operation for queued invalidation interface - */ -static void -qinv_plant_wait(intel_iommu_state_t *iommu, iommu_dvma_cookie_t *dcookies, - uint_t count, uint_t array_size) -{ - iotlb_pend_node_t *node = NULL; - iotlb_pend_head_t *head; - - head = &(iommu->iu_pend_head); - mutex_enter(&(head->ich_mem_lock)); - node = list_head(&(head->ich_mem_list)); - if (node) { - list_remove(&(head->ich_mem_list), node); - } - mutex_exit(&(head->ich_mem_lock)); - - /* no cache, alloc one */ - if (node == NULL) { - node = kmem_zalloc(sizeof (iotlb_pend_node_t), KM_SLEEP); - } - node->icn_dcookies = dcookies; - node->icn_count = count; - node->icn_array_size = array_size; - - /* plant an invalidation wait descriptor, not wait its completion */ - qinv_wait_async_unfence(iommu, node); -} - -/* - * the reap wait operation for queued invalidation interface - */ -static void -qinv_reap_wait(intel_iommu_state_t *iommu) -{ - int index, cnt = 0; - iotlb_pend_node_t *node; - iotlb_pend_head_t *head; - - head = &(iommu->iu_pend_head); - - index = qinv_wait_async_finish(iommu, &cnt); - - while (cnt--) { - node = iommu->iu_inv_queue->iotlb_pend_node[index]; - if (node == NULL) - continue; - dmar_release_dvma_cookie(node->icn_dcookies, - node->icn_count, node->icn_array_size); - - mutex_enter(&(head->ich_mem_lock)); - list_insert_head(&(head->ich_mem_list), node); - mutex_exit(&(head->ich_mem_lock)); - iommu->iu_inv_queue->iotlb_pend_node[index] = NULL; - index++; - if (index == iommu->iu_inv_queue->iq_sync.size) - index = 0; - } -} - -/* init interrupt remapping table */ -static int -intr_remap_init_unit(intel_iommu_state_t *iommu) -{ - intr_remap_tbl_state_t *intr_remap_tbl; - size_t size; - - ddi_dma_attr_t intrr_dma_attr = { - DMA_ATTR_V0, - 0U, - 0xffffffffU, - 0xffffffffU, - MMU_PAGESIZE, /* page aligned */ - 0x1, - 0x1, - 0xffffffffU, - 0xffffffffU, - 1, - 4, - 0 - }; - - ddi_device_acc_attr_t intrr_acc_attr = { - DDI_DEVICE_ATTR_V0, - DDI_NEVERSWAP_ACC, - DDI_STRICTORDER_ACC - }; - - if (intrr_apic_mode == LOCAL_X2APIC) { - if (!IOMMU_ECAP_GET_EIM(iommu->iu_excapability)) { - return (DDI_FAILURE); - } - } - - if (intrr_irta_s > INTRR_MAX_IRTA_SIZE) { - intrr_irta_s = INTRR_MAX_IRTA_SIZE; - } - - intr_remap_tbl = (intr_remap_tbl_state_t *) - kmem_zalloc(sizeof (intr_remap_tbl_state_t), KM_SLEEP); - - if (ddi_dma_alloc_handle(iommu->iu_drhd->di_dip, - &intrr_dma_attr, - DDI_DMA_SLEEP, - NULL, - &(intr_remap_tbl->dma_hdl)) != DDI_SUCCESS) { - goto intrr_tbl_handle_failed; - } - - intr_remap_tbl->size = 1 << (intrr_irta_s + 1); - size = intr_remap_tbl->size * INTRR_RTE_SIZE; - if (ddi_dma_mem_alloc(intr_remap_tbl->dma_hdl, - size, - &intrr_acc_attr, - DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, - DDI_DMA_SLEEP, - NULL, - &(intr_remap_tbl->vaddr), - &size, - &(intr_remap_tbl->acc_hdl)) != DDI_SUCCESS) { - goto intrr_tbl_mem_failed; - - } - - ASSERT(!((uintptr_t)intr_remap_tbl->vaddr & MMU_PAGEOFFSET)); - bzero(intr_remap_tbl->vaddr, size); - intr_remap_tbl->paddr = pfn_to_pa( - hat_getpfnum(kas.a_hat, intr_remap_tbl->vaddr)); - - mutex_init(&(intr_remap_tbl->lock), NULL, MUTEX_DRIVER, NULL); - bitset_init(&intr_remap_tbl->map); - bitset_resize(&intr_remap_tbl->map, intr_remap_tbl->size); - intr_remap_tbl->free = 0; - - iommu->iu_intr_remap_tbl = intr_remap_tbl; - - return (DDI_SUCCESS); - -intrr_tbl_mem_failed: - ddi_dma_free_handle(&(intr_remap_tbl->dma_hdl)); - -intrr_tbl_handle_failed: - kmem_free(intr_remap_tbl, sizeof (intr_remap_tbl_state_t)); - - return (ENOMEM); -} - -/* destroy interrupt remapping table */ -static void -intr_remap_fini_unit(intel_iommu_state_t *iommu) -{ - intr_remap_tbl_state_t *intr_remap_tbl; - - intr_remap_tbl = iommu->iu_intr_remap_tbl; - bitset_fini(&intr_remap_tbl->map); - ddi_dma_mem_free(&(intr_remap_tbl->acc_hdl)); - ddi_dma_free_handle(&(intr_remap_tbl->dma_hdl)); - kmem_free(intr_remap_tbl, sizeof (intr_remap_tbl_state_t)); -} - -/* enable interrupt remapping hardware unit */ -static void -intr_remap_enable_unit(intel_iommu_state_t *iommu) -{ - uint32_t status; - uint64_t irta_reg; - intr_remap_tbl_state_t *intr_remap_tbl; - - intr_remap_tbl = iommu->iu_intr_remap_tbl; - - irta_reg = intr_remap_tbl->paddr | intrr_irta_s; - - if (intrr_apic_mode == LOCAL_X2APIC) - irta_reg |= (0x1 << 11); - - /* set interrupt remap table pointer */ - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg64(iommu, IOMMU_REG_IRTAR, irta_reg); - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_SIRTP); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_IRTPS), status); - mutex_exit(&(iommu->iu_reg_lock)); - - /* global flush intr entry cache */ - qinv_iec_global(iommu); - - /* enable interrupt remapping */ - mutex_enter(&(iommu->iu_reg_lock)); - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_IRE); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_IRES), - status); - iommu->iu_global_cmd_reg |= IOMMU_GCMD_IRE; - - /* set compatible mode */ - iommu_put_reg32(iommu, IOMMU_REG_GLOBAL_CMD, - iommu->iu_global_cmd_reg | IOMMU_GCMD_CFI); - iommu_wait_completion(iommu, IOMMU_REG_GLOBAL_STS, - iommu_get_reg32, (status & IOMMU_GSTS_CFIS), - status); - iommu->iu_global_cmd_reg |= IOMMU_GCMD_CFI; - mutex_exit(&(iommu->iu_reg_lock)); - - iommu->iu_enabled |= INTRR_ENABLE; -} - -/* - * helper function to find the free interrupt remapping - * table entry - */ -static uint_t -bitset_find_free(bitset_t *b, uint_t post) -{ - uint_t i; - uint_t cap = bitset_capacity(b); - - if (post == cap) - post = 0; - - ASSERT(post < cap); - - for (i = post; i < cap; i++) { - if (!bitset_in_set(b, i)) - return (i); - } - - for (i = 0; i < post; i++) { - if (!bitset_in_set(b, i)) - return (i); - } - - return (INTRR_IIDX_FULL); /* no free index */ -} - -/* - * helper function to find 'count' contigous free - * interrupt remapping table entries - */ -static uint_t -bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count) -{ - uint_t i, j; - uint_t cap = bitset_capacity(b); - - if (post == INTRR_IIDX_FULL) { - return (INTRR_IIDX_FULL); - } - - if (count > cap) - return (INTRR_IIDX_FULL); - - ASSERT(post < cap); - - for (i = post; (i + count) <= cap; i++) { - for (j = 0; j < count; j++) { - if (bitset_in_set(b, (i + j))) { - i = i + j; - break; - } - if (j == count - 1) - return (i); - } - } - - for (i = 0; (i < post) && ((i + count) <= cap); i++) { - for (j = 0; j < count; j++) { - if (bitset_in_set(b, (i + j))) { - i = i + j; - break; - } - if (j == count - 1) - return (i); - } - } - - return (INTRR_IIDX_FULL); /* no free index */ -} - -/* alloc one interrupt remapping table entry */ -static int -intrr_tbl_alloc_entry(intr_remap_tbl_state_t *intr_remap_tbl) -{ - uint32_t iidx; - -retry_alloc_iidx: - mutex_enter(&intr_remap_tbl->lock); - iidx = intr_remap_tbl->free; - if (iidx == INTRR_IIDX_FULL) { - /* no free intr entry, use compatible format intr */ - mutex_exit(&intr_remap_tbl->lock); - if (intrr_apic_mode == LOCAL_X2APIC) { - /* - * x2apic mode not allowed compatible - * interrupt - */ - delay(IOMMU_ALLOC_RESOURCE_DELAY); - goto retry_alloc_iidx; - } - } else { - bitset_add(&intr_remap_tbl->map, iidx); - intr_remap_tbl->free = bitset_find_free(&intr_remap_tbl->map, - iidx + 1); - mutex_exit(&intr_remap_tbl->lock); - } - - return (iidx); -} - -/* alloc 'cnt' contigous interrupt remapping table entries */ -static int -intrr_tbl_alloc_multi_entries(intr_remap_tbl_state_t *intr_remap_tbl, - uint_t cnt) -{ - uint_t iidx, pos, i; - -retry_alloc_iidxs: - mutex_enter(&intr_remap_tbl->lock); - pos = intr_remap_tbl->free; - iidx = bitset_find_multi_free(&intr_remap_tbl->map, pos, cnt); - if (iidx != INTRR_IIDX_FULL) { - if (iidx <= pos && pos < (iidx + cnt)) { - intr_remap_tbl->free = bitset_find_free( - &intr_remap_tbl->map, iidx + cnt); - } - for (i = 0; i < cnt; i++) { - bitset_add(&intr_remap_tbl->map, iidx + i); - } - mutex_exit(&intr_remap_tbl->lock); - } else { - mutex_exit(&intr_remap_tbl->lock); - if (intrr_apic_mode == LOCAL_X2APIC) { - /* x2apic mode not allowed comapitible interrupt */ - delay(IOMMU_ALLOC_RESOURCE_DELAY); - goto retry_alloc_iidxs; - } - } - - return (iidx); -} - -/* get ioapic source id and iommu structure for ioapics */ -static void -get_ioapic_iommu_info(void) -{ - ioapic_drhd_info_t *ioapic_dinfo; - uint_t i; - - for_each_in_list(&ioapic_drhd_infos, ioapic_dinfo) { - for (i = 0; i < MAX_IO_APIC; i++) { - if (ioapic_dinfo->ioapic_id == apic_io_id[i]) { - ioapic_iommu_infos[i] = kmem_zalloc( - sizeof (ioapic_iommu_info_t), KM_SLEEP); - ioapic_iommu_infos[i]->sid = ioapic_dinfo->sid; - ioapic_iommu_infos[i]->iommu = - (intel_iommu_state_t *) - ioapic_dinfo->drhd->di_iommu; - break; - } - } - } -} - -/* initialize interrupt remapping */ -static int -intr_remap_init(int apic_mode) -{ - intel_iommu_state_t *iommu; - int intrr_all_disable = 1; - - intrr_apic_mode = apic_mode; - - for_each_in_list(&iommu_states, iommu) { - if ((iommu->iu_enabled & QINV_ENABLE) && - IOMMU_ECAP_GET_IR(iommu->iu_excapability)) { - if (intr_remap_init_unit(iommu) == DDI_SUCCESS) { - intrr_all_disable = 0; - } - } - } - - if (intrr_all_disable) { - /* - * if all drhd unit disabled intr remapping, - * return FAILURE - */ - return (DDI_FAILURE); - } else { - return (DDI_SUCCESS); - } -} - -/* enable interrupt remapping */ -static void -intr_remap_enable(int suppress_brdcst_eoi) -{ - intel_iommu_state_t *iommu; - - intrr_suppress_brdcst_eoi = suppress_brdcst_eoi; - - for_each_in_list(&iommu_states, iommu) { - if (iommu->iu_intr_remap_tbl) - intr_remap_enable_unit(iommu); - } - - /* get iommu structure and interrupt source id for ioapic */ - get_ioapic_iommu_info(); -} - -/* alloc remapping entry for the interrupt */ -static void -intr_remap_alloc_entry(apic_irq_t *irq_ptr) -{ - intel_iommu_state_t *iommu; - intr_remap_tbl_state_t *intr_remap_tbl; - uint32_t iidx, cnt, i; - uint_t vector, irqno; - uint32_t sid_svt_sq; - - if (AIRQ_PRIVATE(irq_ptr) == INTRR_DISABLE || - AIRQ_PRIVATE(irq_ptr) != NULL) { - return; - } - - AIRQ_PRIVATE(irq_ptr) = - kmem_zalloc(sizeof (intr_remap_private_t), KM_SLEEP); - - intr_remap_get_iommu(irq_ptr); - - iommu = INTRR_PRIVATE(irq_ptr)->ir_iommu; - if (iommu == NULL) { - goto intr_remap_disable; - } - - intr_remap_tbl = iommu->iu_intr_remap_tbl; - - if (irq_ptr->airq_mps_intr_index == MSI_INDEX) { - cnt = irq_ptr->airq_intin_no; - } else { - cnt = 1; - } - - if (cnt == 1) { - iidx = intrr_tbl_alloc_entry(intr_remap_tbl); - } else { - iidx = intrr_tbl_alloc_multi_entries(intr_remap_tbl, cnt); - } - - if (iidx == INTRR_IIDX_FULL) { - goto intr_remap_disable; - } - - INTRR_PRIVATE(irq_ptr)->ir_iidx = iidx; - - intr_remap_get_sid(irq_ptr); - - if (cnt == 1) { - if (IOMMU_CAP_GET_CM(iommu->iu_capability)) { - qinv_iec_single(iommu, iidx); - } else { - iommu->iu_dmar_ops->do_flwb(iommu); - } - return; - } - - sid_svt_sq = INTRR_PRIVATE(irq_ptr)->ir_sid_svt_sq; - - vector = irq_ptr->airq_vector; - - for (i = 1; i < cnt; i++) { - irqno = apic_vector_to_irq[vector + i]; - irq_ptr = apic_irq_table[irqno]; - - ASSERT(irq_ptr); - - AIRQ_PRIVATE(irq_ptr) = - kmem_zalloc(sizeof (intr_remap_private_t), KM_SLEEP); - - INTRR_PRIVATE(irq_ptr)->ir_iommu = iommu; - INTRR_PRIVATE(irq_ptr)->ir_sid_svt_sq = sid_svt_sq; - INTRR_PRIVATE(irq_ptr)->ir_iidx = iidx + i; - } - - if (IOMMU_CAP_GET_CM(iommu->iu_capability)) { - qinv_iec(iommu, iidx, cnt); - } else { - iommu->iu_dmar_ops->do_flwb(iommu); - } - - return; - -intr_remap_disable: - kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intr_remap_private_t)); - AIRQ_PRIVATE(irq_ptr) = INTRR_DISABLE; -} - -/* helper function to get iommu structure */ -static void intr_remap_get_iommu(apic_irq_t *irq_ptr) -{ - intel_iommu_state_t *iommu = NULL; - - ASSERT(INTRR_PRIVATE(irq_ptr)->ir_iommu == NULL); - - if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { - /* for fixed interrupt */ - uint_t ioapic_index = irq_ptr->airq_ioapicindex; - if (ioapic_iommu_infos[ioapic_index]) - iommu = ioapic_iommu_infos[ioapic_index]->iommu; - } else { - if (irq_ptr->airq_dip != NULL) { - iommu = iommu_get_dmar(irq_ptr->airq_dip); - } - } - - if ((iommu != NULL) && (iommu->iu_enabled & INTRR_ENABLE)) { - INTRR_PRIVATE(irq_ptr)->ir_iommu = iommu; - } -} - -/* helper function to get interrupt request source id */ -static void -intr_remap_get_sid(apic_irq_t *irq_ptr) -{ - dev_info_t *dip, *pdip; - iommu_private_t *private; - uint16_t sid; - uchar_t svt, sq; - - if (!intrr_enable_sid_verify) { - return; - } - - if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { - /* for interrupt through I/O APIC */ - uint_t ioapic_index = irq_ptr->airq_ioapicindex; - - sid = ioapic_iommu_infos[ioapic_index]->sid; - - svt = SVT_ALL_VERIFY; - sq = SQ_VERIFY_ALL; - } else { - /* MSI/MSI-X interrupt */ - dip = irq_ptr->airq_dip; - ASSERT(dip); - pdip = iommu_get_pci_top_bridge(dip); - if (pdip == NULL) { - /* pcie device */ - private = DEVI(dip)->devi_iommu_private; - ASSERT(private); - sid = (private->idp_bus << 8) | private->idp_devfn; - svt = SVT_ALL_VERIFY; - sq = SQ_VERIFY_ALL; - } else { - private = DEVI(pdip)->devi_iommu_private; - ASSERT(private); - - if (private->idp_bbp_type == IOMMU_PPB_PCIE_PCI) { - /* device behind pcie to pci bridge */ - sid = (private->idp_bus << 8) | \ - private->idp_sec; - svt = SVT_BUS_VERIFY; - sq = SQ_VERIFY_ALL; - } else { - /* device behind pci to pci bridge */ - sid = (private->idp_bus << 8) | \ - private->idp_devfn; - svt = SVT_ALL_VERIFY; - sq = SQ_VERIFY_ALL; - } - } - } - - INTRR_PRIVATE(irq_ptr)->ir_sid_svt_sq = sid | (svt << 18) | (sq << 16); -} - -/* remapping the interrupt */ -static void -intr_remap_map_entry(apic_irq_t *irq_ptr, void *intr_data) -{ - intel_iommu_state_t *iommu; - intr_remap_tbl_state_t *intr_remap_tbl; - ioapic_rdt_t *irdt = (ioapic_rdt_t *)intr_data; - msi_regs_t *mregs = (msi_regs_t *)intr_data; - intr_rte_t irte; - uint_t iidx, i, cnt; - uint32_t dst, sid_svt_sq; - uchar_t vector, dlm, tm, rh, dm; - - if (AIRQ_PRIVATE(irq_ptr) == INTRR_DISABLE) { - return; - } - - if (irq_ptr->airq_mps_intr_index == MSI_INDEX) { - cnt = irq_ptr->airq_intin_no; - } else { - cnt = 1; - } - - iidx = INTRR_PRIVATE(irq_ptr)->ir_iidx; - iommu = INTRR_PRIVATE(irq_ptr)->ir_iommu; - intr_remap_tbl = iommu->iu_intr_remap_tbl; - sid_svt_sq = INTRR_PRIVATE(irq_ptr)->ir_sid_svt_sq; - vector = irq_ptr->airq_vector; - - if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { - dm = RDT_DM(irdt->ir_lo); - rh = 0; - tm = RDT_TM(irdt->ir_lo); - dlm = RDT_DLM(irdt->ir_lo); - dst = irdt->ir_hi; - - /* - * Mark the IRTE's TM as Edge to suppress broadcast EOI. - */ - if (intrr_suppress_brdcst_eoi) { - tm = TRIGGER_MODE_EDGE; - } - } else { - dm = MSI_ADDR_DM_PHYSICAL; - rh = MSI_ADDR_RH_FIXED; - tm = TRIGGER_MODE_EDGE; - dlm = 0; - dst = mregs->mr_addr; - } - - if (intrr_apic_mode == LOCAL_APIC) - dst = (dst & 0xFF) << 8; - - if (cnt == 1) { - irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1); - irte.hi = IRTE_HIGH(sid_svt_sq); - - /* set interrupt remapping table entry */ - bcopy(&irte, intr_remap_tbl->vaddr + - iidx * INTRR_RTE_SIZE, - INTRR_RTE_SIZE); - - qinv_iec_single(iommu, iidx); - - } else { - vector = irq_ptr->airq_vector; - for (i = 0; i < cnt; i++) { - irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1); - irte.hi = IRTE_HIGH(sid_svt_sq); - - /* set interrupt remapping table entry */ - bcopy(&irte, intr_remap_tbl->vaddr + - iidx * INTRR_RTE_SIZE, - INTRR_RTE_SIZE); - vector++; - iidx++; - } - - qinv_iec(iommu, iidx, cnt); - } -} - -/* free the remapping entry */ -static void -intr_remap_free_entry(apic_irq_t *irq_ptr) -{ - intel_iommu_state_t *iommu; - intr_remap_tbl_state_t *intr_remap_tbl; - uint32_t iidx; - - if (AIRQ_PRIVATE(irq_ptr) == INTRR_DISABLE) { - AIRQ_PRIVATE(irq_ptr) = NULL; - return; - } - - iommu = INTRR_PRIVATE(irq_ptr)->ir_iommu; - intr_remap_tbl = iommu->iu_intr_remap_tbl; - iidx = INTRR_PRIVATE(irq_ptr)->ir_iidx; - - bzero(intr_remap_tbl->vaddr + iidx * INTRR_RTE_SIZE, - INTRR_RTE_SIZE); - - qinv_iec_single(iommu, iidx); - - mutex_enter(&intr_remap_tbl->lock); - bitset_del(&intr_remap_tbl->map, iidx); - if (intr_remap_tbl->free == INTRR_IIDX_FULL) { - intr_remap_tbl->free = iidx; - } - mutex_exit(&intr_remap_tbl->lock); - - kmem_free(AIRQ_PRIVATE(irq_ptr), sizeof (intr_remap_private_t)); - AIRQ_PRIVATE(irq_ptr) = NULL; -} - -/* record the ioapic rdt entry */ -static void -intr_remap_record_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt) -{ - uint32_t rdt_entry, tm, pol, iidx, vector; - - rdt_entry = irdt->ir_lo; - - if (INTRR_PRIVATE(irq_ptr) != NULL) { - iidx = INTRR_PRIVATE(irq_ptr)->ir_iidx; - tm = RDT_TM(rdt_entry); - pol = RDT_POL(rdt_entry); - vector = irq_ptr->airq_vector; - irdt->ir_lo = (tm << INTRR_IOAPIC_TM_SHIFT) | - (pol << INTRR_IOAPIC_POL_SHIFT) | - ((iidx >> 15) << INTRR_IOAPIC_IIDX15_SHIFT) | - vector; - irdt->ir_hi = (iidx << INTRR_IOAPIC_IIDX_SHIFT) | - (1 << INTRR_IOAPIC_FORMAT_SHIFT); - } else { - irdt->ir_hi <<= APIC_ID_BIT_OFFSET; - } -} - -/* record the msi interrupt structure */ -/*ARGSUSED*/ -static void -intr_remap_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs) -{ - uint_t iidx; - - if (INTRR_PRIVATE(irq_ptr) != NULL) { - iidx = INTRR_PRIVATE(irq_ptr)->ir_iidx; - - mregs->mr_data = 0; - mregs->mr_addr = MSI_ADDR_HDR | - ((iidx & 0x7fff) << INTRR_MSI_IIDX_SHIFT) | - (1 << INTRR_MSI_FORMAT_SHIFT) | (1 << INTRR_MSI_SHV_SHIFT) | - ((iidx >> 15) << INTRR_MSI_IIDX15_SHIFT); - } else { - mregs->mr_addr = MSI_ADDR_HDR | - (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | - (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | - (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); - mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | - mregs->mr_data; - } -}
--- a/usr/src/uts/i86pc/io/iommu_rscs.c Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,392 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#include <sys/conf.h> -#include <sys/autoconf.h> -#include <sys/sysmacros.h> -#include <sys/debug.h> -#include <sys/psw.h> -#include <sys/ddidmareq.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <vm/seg.h> -#include <vm/seg_kmem.h> -#include <vm/seg_kpm.h> -#include <vm/seg_dev.h> -#include <sys/vmem.h> -#include <vm/hat.h> -#include <vm/as.h> -#include <vm/page.h> -#include <sys/avintr.h> -#include <sys/errno.h> -#include <sys/modctl.h> -#include <sys/ddi_impldefs.h> -#include <sys/sunddi.h> -#include <sys/sunndi.h> -#include <sys/mach_intr.h> -#include <vm/hat_i86.h> -#include <sys/machsystm.h> -#include <sys/iommu_rscs.h> -#include <sys/intel_iommu.h> - -ddi_dma_attr_t page_dma_attr = { - DMA_ATTR_V0, - 0U, - 0xffffffffU, - 0xffffffffU, - MMU_PAGESIZE, /* page aligned */ - 0x1, - 0x1, - 0xffffffffU, - 0xffffffffU, - 1, - 4, - 0 -}; - -ddi_device_acc_attr_t page_acc_attr = { - DDI_DEVICE_ATTR_V0, - DDI_NEVERSWAP_ACC, - DDI_STRICTORDER_ACC -}; - -typedef struct iommu_rscs_s { - /* - * Bounds of resource allocation. We will start allocating at rs_min - * and rollover at rs_max+1 (rs_max is included). e.g. for rs_min=0 - * and rs_max=7, we will have 8 total resources which can be alloced. - */ - uint_t rs_min; - uint_t rs_max; - - /* - * rs_free points to an array of 64-bit values used to track resource - * allocation. rs_free_size is the free buffer size in bytes. - */ - uint64_t *rs_free; - uint_t rs_free_size; - - /* - * last tracks the last alloc'd resource. This allows us to do a round - * robin allocation. - */ - uint_t rs_last; - - kmutex_t rs_mutex; -} iommu_rscs_state_t; - -static uint_t -iommu_pghdl_hash_func(paddr_t paddr) -{ - return (paddr % IOMMU_PGHDL_HASH_SIZE); -} - -/* - * iommu_page_alloc() - * - */ -iommu_pghdl_t * -iommu_page_alloc(intel_iommu_state_t *iommu, int kmflag) -{ - size_t actual_size = 0; - iommu_pghdl_t *pghdl; - caddr_t vaddr; - uint_t idx; - - ASSERT(kmflag == KM_SLEEP || kmflag == KM_NOSLEEP); - - pghdl = kmem_zalloc(sizeof (*pghdl), kmflag); - if (pghdl == NULL) { - return (0); - } - - if (ddi_dma_alloc_handle(ddi_root_node(), &page_dma_attr, DDI_DMA_SLEEP, - NULL, &pghdl->dma_hdl) != DDI_SUCCESS) { - kmem_free(pghdl, sizeof (*pghdl)); - return (0); - } - - if (ddi_dma_mem_alloc(pghdl->dma_hdl, PAGESIZE, &page_acc_attr, - DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, - (kmflag == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, - NULL, &vaddr, &actual_size, &pghdl->mem_hdl) != DDI_SUCCESS) { - ddi_dma_free_handle(&pghdl->dma_hdl); - kmem_free(pghdl, sizeof (*pghdl)); - return (0); - } - - ASSERT(actual_size == PAGESIZE); - - if (actual_size != PAGESIZE) { - ddi_dma_mem_free(&pghdl->mem_hdl); - ddi_dma_free_handle(&pghdl->dma_hdl); - kmem_free(pghdl, sizeof (*pghdl)); - return (0); - - } - - pghdl->paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr)); - pghdl->vaddr = vaddr; - - idx = iommu_pghdl_hash_func(pghdl->paddr); - pghdl->next = iommu->iu_pghdl_hash[idx]; - if (pghdl->next) - pghdl->next->prev = pghdl; - iommu->iu_pghdl_hash[idx] = pghdl; - - return (pghdl); -} - -/* - * iommu_page_free() - */ -void -iommu_page_free(intel_iommu_state_t *iommu, paddr_t paddr) -{ - uint_t idx; - iommu_pghdl_t *pghdl; - - idx = iommu_pghdl_hash_func(paddr); - pghdl = iommu->iu_pghdl_hash[idx]; - while (pghdl && pghdl->paddr != paddr) - pghdl = pghdl->next; - if (pghdl == NULL) { - cmn_err(CE_PANIC, - "Freeing a free IOMMU page: paddr=0x%" PRIx64, - paddr); - /*NOTREACHED*/ - } - if (pghdl->prev == NULL) - iommu->iu_pghdl_hash[idx] = pghdl->next; - else - pghdl->prev->next = pghdl->next; - if (pghdl->next) - pghdl->next->prev = pghdl->prev; - - ddi_dma_mem_free(&pghdl->mem_hdl); - ddi_dma_free_handle(&pghdl->dma_hdl); - kmem_free(pghdl, sizeof (*pghdl)); -} - -/* - * iommu_get_vaddr() - */ -caddr_t -iommu_get_vaddr(intel_iommu_state_t *iommu, paddr_t paddr) -{ - uint_t idx; - iommu_pghdl_t *pghdl; - - idx = iommu_pghdl_hash_func(paddr); - pghdl = iommu->iu_pghdl_hash[idx]; - while (pghdl && pghdl->paddr != paddr) - pghdl = pghdl->next; - if (pghdl == NULL) { - return (0); - } - return (pghdl->vaddr); -} - - -/* - * iommu_rscs_init() - * Initialize the resource structure. init() returns a handle to be - * used for the rest of the resource functions. This code is written assuming - * that min_val will be close to 0. Therefore, we will allocate the free - * buffer only taking max_val into account. - */ -void -iommu_rscs_init(uint_t min_val, uint_t max_val, iommu_rscs_t *handle) -{ - iommu_rscs_state_t *rstruct; - uint_t array_size; - uint_t index; - - - ASSERT(handle != NULL); - ASSERT(min_val < max_val); - - /* alloc space for resource structure */ - rstruct = kmem_alloc(sizeof (iommu_rscs_state_t), KM_SLEEP); - - /* - * Test to see if the max value is 64-bit aligned. If so, we don't need - * to allocate an extra 64-bit word. alloc space for free buffer - * (8 bytes per uint64_t). - */ - if ((max_val & 0x3F) == 0) { - rstruct->rs_free_size = (max_val >> 6) * 8; - } else { - rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; - } - rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); - - /* Initialize resource structure */ - rstruct->rs_min = min_val; - rstruct->rs_last = min_val; - rstruct->rs_max = max_val; - mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); - - /* Mark all resources as free */ - array_size = rstruct->rs_free_size >> 3; - for (index = 0; index < array_size; index++) { - rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; - } - - /* setup handle which is returned from this function */ - *handle = rstruct; -} - - -/* - * iommu_rscs_fini() - * Frees up the space allocated in init(). Notice that a pointer to the - * handle is used for the parameter. fini() will set the handle to NULL - * before returning. - */ -void -iommu_rscs_fini(iommu_rscs_t *handle) -{ - iommu_rscs_state_t *rstruct; - - - ASSERT(handle != NULL); - - rstruct = (iommu_rscs_state_t *)*handle; - - mutex_destroy(&rstruct->rs_mutex); - kmem_free(rstruct->rs_free, rstruct->rs_free_size); - kmem_free(rstruct, sizeof (iommu_rscs_state_t)); - - /* set handle to null. This helps catch bugs. */ - *handle = NULL; -} - - -/* - * iommu_rscs_alloc() - * alloc a resource. If alloc fails, we are out of resources. - */ -int -iommu_rscs_alloc(iommu_rscs_t handle, uint_t *resource) -{ - iommu_rscs_state_t *rstruct; - uint_t array_idx; - uint64_t free; - uint_t index; - uint_t last; - uint_t min; - uint_t max; - - - ASSERT(handle != NULL); - ASSERT(resource != NULL); - - rstruct = (iommu_rscs_state_t *)handle; - - mutex_enter(&rstruct->rs_mutex); - min = rstruct->rs_min; - max = rstruct->rs_max; - - /* - * Find a free resource. This will return out of the loop once it finds - * a free resource. There are a total of 'max'-'min'+1 resources. - * Performs a round robin allocation. - */ - for (index = min; index <= max; index++) { - - array_idx = rstruct->rs_last >> 6; - free = rstruct->rs_free[array_idx]; - last = rstruct->rs_last & 0x3F; - - /* if the next resource to check is free */ - if ((free & ((uint64_t)1 << last)) != 0) { - /* we are using this resource */ - *resource = rstruct->rs_last; - - /* take it out of the free list */ - rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); - - /* - * increment the last count so we start checking the - * next resource on the next alloc(). Note the rollover - * at 'max'+1. - */ - rstruct->rs_last++; - if (rstruct->rs_last > max) { - rstruct->rs_last = rstruct->rs_min; - } - - /* unlock the resource structure */ - mutex_exit(&rstruct->rs_mutex); - - return (DDI_SUCCESS); - } - - /* - * This resource is not free, lets go to the next one. Note the - * rollover at 'max'. - */ - rstruct->rs_last++; - if (rstruct->rs_last > max) { - rstruct->rs_last = rstruct->rs_min; - } - } - - mutex_exit(&rstruct->rs_mutex); - - return (DDI_FAILURE); -} - - -/* - * iommu_rscs_free() - * Free the previously alloc'd resource. Once a resource has been free'd, - * it can be used again when alloc is called. - */ -void -iommu_rscs_free(iommu_rscs_t handle, uint_t resource) -{ - iommu_rscs_state_t *rstruct; - uint_t array_idx; - uint_t offset; - - - ASSERT(handle != NULL); - - rstruct = (iommu_rscs_state_t *)handle; - ASSERT(resource >= rstruct->rs_min); - ASSERT(resource <= rstruct->rs_max); - - mutex_enter(&rstruct->rs_mutex); - - /* Put the resource back in the free list */ - array_idx = resource >> 6; - offset = resource & 0x3F; - rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); - - mutex_exit(&rstruct->rs_mutex); -}
--- a/usr/src/uts/i86pc/io/mp_platform_common.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/io/mp_platform_common.c Sat Jan 30 18:23:16 2010 -0800 @@ -1759,7 +1759,7 @@ } #if !defined(__xpv) - apic_vt_ops->apic_intrr_free_entry(irqptr); + apic_vt_ops->apic_intrmap_free_entry(irqptr); #endif /* @@ -2966,10 +2966,10 @@ #if !defined(__xpv) irdt.ir_hi = AV_TOALL >> APIC_ID_BIT_OFFSET; - apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); - apic_vt_ops->apic_intrr_map_entry( + apic_vt_ops->apic_intrmap_alloc_entry(irq_ptr); + apic_vt_ops->apic_intrmap_map_entry( irq_ptr, (void *)&irdt); - apic_vt_ops->apic_intrr_record_rdt(irq_ptr, &irdt); + apic_vt_ops->apic_intrmap_record_rdt(irq_ptr, &irdt); /* Write the RDT entry -- no specific CPU binding */ WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no, @@ -3010,9 +3010,9 @@ irdt.ir_hi = cpu_infop->aci_local_id; #if !defined(__xpv) - apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); - apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&irdt); - apic_vt_ops->apic_intrr_record_rdt(irq_ptr, &irdt); + apic_vt_ops->apic_intrmap_alloc_entry(irq_ptr); + apic_vt_ops->apic_intrmap_map_entry(irq_ptr, (void *)&irdt); + apic_vt_ops->apic_intrmap_record_rdt(irq_ptr, &irdt); /* Write the RDT entry -- bind to a specific CPU: */ WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no,
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c Sat Jan 30 18:23:16 2010 -0800 @@ -108,7 +108,7 @@ static void apic_timer_enable(void); static void apic_timer_disable(void); static void apic_post_cyclic_setup(void *arg); -static void apic_intrr_init(int apic_mode); +static void apic_intrmap_init(int apic_mode); static void apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt); static void apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs); @@ -366,7 +366,7 @@ uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ /* default apic ops without interrupt remapping */ -static apic_intrr_ops_t apic_nointrr_ops = { +static apic_intrmap_ops_t apic_nointrmap_ops = { (int (*)(int))return_instr, (void (*)(int))return_instr, (void (*)(apic_irq_t *))return_instr, @@ -376,7 +376,7 @@ apic_record_msi, }; -apic_intrr_ops_t *apic_vt_ops = &apic_nointrr_ops; +apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops; /* * This is the loadable module wrapper @@ -759,7 +759,7 @@ * Initialize and enable interrupt remapping before apic * hardware initialization */ - apic_intrr_init(apic_mode); + apic_intrmap_init(apic_mode); /* * On UniSys Model 6520, the BIOS leaves vector 0x20 isr @@ -2591,7 +2591,7 @@ } static void -apic_intrr_init(int apic_mode) +apic_intrmap_init(int apic_mode) { int suppress_brdcst_eoi = 0; @@ -2602,8 +2602,9 @@ * documentation (yet)), initialize interrupt remapping * support before initializing the X2APIC unit. */ - if (((apic_intrr_ops_t *)psm_vt_ops)->apic_intrr_init(apic_mode) - == DDI_SUCCESS) { + if (((apic_intrmap_ops_t *)psm_vt_ops)-> + apic_intrmap_init(apic_mode) == DDI_SUCCESS) { + apic_vt_ops = psm_vt_ops; /* @@ -2615,7 +2616,7 @@ suppress_brdcst_eoi = 1; } - apic_vt_ops->apic_intrr_enable(suppress_brdcst_eoi); + apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi); if (apic_detect_x2apic()) { apic_enable_x2apic();
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c Sat Jan 30 18:23:16 2010 -0800 @@ -94,9 +94,9 @@ msi_regs.mr_data = vector; msi_regs.mr_addr = target_apic_id; - apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); - apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs); - apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs); + apic_vt_ops->apic_intrmap_alloc_entry(irq_ptr); + apic_vt_ops->apic_intrmap_map_entry(irq_ptr, (void *)&msi_regs); + apic_vt_ops->apic_intrmap_record_msi(irq_ptr, &msi_regs); /* MSI Address */ msi_addr = msi_regs.mr_addr;
--- a/usr/src/uts/i86pc/io/rootnex.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/io/rootnex.c Sat Jan 30 18:23:16 2010 -0800 @@ -67,8 +67,10 @@ #include <sys/hypervisor.h> #include <sys/bootconf.h> #include <vm/kboot_mmu.h> -#else -#include <sys/intel_iommu.h> +#endif + +#if defined(__amd64) && !defined(__xpv) +#include <sys/immu.h> #endif @@ -90,6 +92,8 @@ int rootnex_sync_check_parms = 0; #endif +boolean_t rootnex_dmar_not_setup; + /* Master Abort and Target Abort panic flag */ int rootnex_fm_ma_ta_panic_flag = 0; @@ -220,7 +224,7 @@ ddi_dma_cookie_t *cookiep, uint_t *ccountp); static int rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle); -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) static void rootnex_coredma_reset_cookies(dev_info_t *dip, ddi_dma_handle_t handle); static int rootnex_coredma_get_cookies(dev_info_t *dip, ddi_dma_handle_t handle, @@ -271,6 +275,7 @@ static int rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); static int rootnex_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); +static int rootnex_quiesce(dev_info_t *dip); static struct dev_ops rootnex_ops = { DEVO_REV, @@ -284,7 +289,7 @@ &rootnex_cb_ops, &rootnex_bus_ops, NULL, - ddi_quiesce_not_needed, /* quiesce */ + rootnex_quiesce, /* quiesce */ }; static struct modldrv rootnex_modldrv = { @@ -299,7 +304,7 @@ NULL }; -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) static iommulib_nexops_t iommulib_nexops = { IOMMU_NEXOPS_VERSION, "Rootnex IOMMU ops Vers 1.1", @@ -437,7 +442,11 @@ case DDI_ATTACH: break; case DDI_RESUME: +#if defined(__amd64) && !defined(__xpv) + return (immu_unquiesce()); +#else return (DDI_SUCCESS); +#endif default: return (DDI_FAILURE); } @@ -453,7 +462,6 @@ rootnex_state->r_err_ibc = (ddi_iblock_cookie_t)ipltospl(15); rootnex_state->r_reserved_msg_printed = B_FALSE; rootnex_cnt = &rootnex_state->r_counters[0]; - rootnex_state->r_intel_iommu_enabled = B_FALSE; /* * Set minimum fm capability level for i86pc platforms and then @@ -481,21 +489,7 @@ /* Initialize rootnex event handle */ i_ddi_rootnex_init_events(dip); -#if !defined(__xpv) -#if defined(__amd64) - /* probe intel iommu */ - intel_iommu_probe_and_parse(); - - /* attach the iommu nodes */ - if (intel_iommu_support) { - if (intel_iommu_attach_dmar_nodes() == DDI_SUCCESS) { - rootnex_state->r_intel_iommu_enabled = B_TRUE; - } else { - intel_iommu_release_dmar_info(); - } - } -#endif - +#if defined(__amd64) && !defined(__xpv) e = iommulib_nexus_register(dip, &iommulib_nexops, &rootnex_state->r_iommulib_handle); @@ -516,12 +510,16 @@ { switch (cmd) { case DDI_SUSPEND: - break; +#if defined(__amd64) && !defined(__xpv) + return (immu_quiesce()); +#else + return (DDI_SUCCESS); +#endif default: return (DDI_FAILURE); } - - return (DDI_SUCCESS); + /*NOTREACHED*/ + } @@ -1746,7 +1744,7 @@ rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *handlep) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) uint_t error = ENOTSUP; int retval; @@ -1806,7 +1804,7 @@ static int rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) if (IOMMU_USED(rdip)) { return (iommulib_nexdma_freehdl(dip, rdip, handle)); } @@ -1814,7 +1812,6 @@ return (rootnex_coredma_freehdl(dip, rdip, handle)); } - /*ARGSUSED*/ static int rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip, @@ -1828,7 +1825,6 @@ int kmflag; int e; - hp = (ddi_dma_impl_t *)handle; dma = (rootnex_dma_t *)hp->dmai_private; sinfo = &dma->dp_sglinfo; @@ -1879,36 +1875,25 @@ /* save away the original bind info */ dma->dp_dma = dmareq->dmar_object; -#if !defined(__xpv) - if (rootnex_state->r_intel_iommu_enabled) { - e = intel_iommu_map_sgl(handle, dmareq, - rootnex_state->r_prealloc_cookies); - - switch (e) { - case IOMMU_SGL_SUCCESS: - goto rootnex_sgl_end; - - case IOMMU_SGL_DISABLE: - goto rootnex_sgl_start; - - case IOMMU_SGL_NORESOURCES: - cmn_err(CE_WARN, "iommu map sgl failed for %s", - ddi_node_name(dma->dp_dip)); - rootnex_clean_dmahdl(hp); - return (DDI_DMA_NORESOURCES); - - default: - cmn_err(CE_WARN, - "undefined value returned from" - " intel_iommu_map_sgl: %d", - e); - rootnex_clean_dmahdl(hp); - return (DDI_DMA_NORESOURCES); - } +#if defined(__amd64) && !defined(__xpv) + e = immu_map_sgl(hp, dmareq, rootnex_prealloc_cookies, rdip); + switch (e) { + case DDI_DMA_MAPPED: + goto out; + case DDI_DMA_USE_PHYSICAL: + break; + case DDI_DMA_PARTIAL: + ddi_err(DER_PANIC, rdip, "Partial DVMA map"); + e = DDI_DMA_NORESOURCES; + /*FALLTHROUGH*/ + default: + ddi_err(DER_MODE, rdip, "DVMA map failed"); + ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]); + rootnex_clean_dmahdl(hp); + return (e); } #endif -rootnex_sgl_start: /* * Figure out a rough estimate of what maximum number of pages this * buffer could use (a high estimate of course). @@ -1963,15 +1948,15 @@ /* * Get the real sgl. rootnex_get_sgl will fill in cookie array while - * looking at the contraints in the dma structure. It will then put some - * additional state about the sgl in the dma struct (i.e. is the sgl - * clean, or do we need to do some munging; how many pages need to be - * copied, etc.) + * looking at the constraints in the dma structure. It will then put + * some additional state about the sgl in the dma struct (i.e. is + * the sgl clean, or do we need to do some munging; how many pages + * need to be copied, etc.) */ rootnex_get_sgl(&dmareq->dmar_object, dma->dp_cookies, &dma->dp_sglinfo); -rootnex_sgl_end: +out: ASSERT(sinfo->si_sgl_size <= sinfo->si_max_pages); /* if we don't need a copy buffer, we don't need to sync */ if (sinfo->si_copybuf_req == 0) { @@ -2008,11 +1993,12 @@ *ccountp = sinfo->si_sgl_size; hp->dmai_cookie++; hp->dmai_rflags &= ~DDI_DMA_PARTIAL; - hp->dmai_nwin = 1; - ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]); - ROOTNEX_DPROBE3(rootnex__bind__fast, dev_info_t *, rdip, - uint64_t, rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], uint_t, - dma->dp_dma.dmao_size); + ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]); + DTRACE_PROBE3(rootnex__bind__fast, dev_info_t *, rdip, + uint64_t, rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], + uint_t, dma->dp_dma.dmao_size); + + return (DDI_DMA_MAPPED); } @@ -2055,6 +2041,7 @@ if (e == DDI_DMA_MAPPED) { hp->dmai_rflags &= ~DDI_DMA_PARTIAL; *ccountp = sinfo->si_sgl_size; + hp->dmai_nwin = 1; } else { hp->dmai_rflags |= DDI_DMA_PARTIAL; *ccountp = dma->dp_window[dma->dp_current_win].wd_cookie_cnt; @@ -2070,7 +2057,6 @@ return (e); } - /* * rootnex_dma_bindhdl() * called from ddi_dma_addr_bind_handle() and ddi_dma_buf_bind_handle(). @@ -2080,7 +2066,7 @@ ddi_dma_handle_t handle, struct ddi_dma_req *dmareq, ddi_dma_cookie_t *cookiep, uint_t *ccountp) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) if (IOMMU_USED(rdip)) { return (iommulib_nexdma_bindhdl(dip, rdip, handle, dmareq, cookiep, ccountp)); @@ -2090,6 +2076,8 @@ cookiep, ccountp)); } + + /*ARGSUSED*/ static int rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip, @@ -2136,12 +2124,13 @@ rootnex_teardown_copybuf(dma); rootnex_teardown_windows(dma); -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) /* - * If intel iommu enabled, clean up the page tables and free the dvma + * Clean up the page tables and free the dvma */ - if (rootnex_state->r_intel_iommu_enabled) { - intel_iommu_unmap_sgl(handle); + e = immu_unmap_sgl(hp, rdip); + if (e != DDI_DMA_USE_PHYSICAL && e != DDI_SUCCESS) { + return (e); } #endif @@ -2178,7 +2167,7 @@ rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) if (IOMMU_USED(rdip)) { return (iommulib_nexdma_unbindhdl(dip, rdip, handle)); } @@ -2186,7 +2175,7 @@ return (rootnex_coredma_unbindhdl(dip, rdip, handle)); } -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) static int rootnex_coredma_get_sleep_flags(ddi_dma_handle_t handle) @@ -2491,7 +2480,6 @@ return (DDI_SUCCESS); } - /* * rootnex_valid_bind_parms() * Called in ddi_dma_*_bind_handle path to validate its parameters. @@ -2794,7 +2782,6 @@ } } - /* * rootnex_bind_slowpath() * Call in the bind path if the calling driver can't use the sgl without @@ -4229,7 +4216,7 @@ rootnex_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle, off_t off, size_t len, uint_t cache_flags) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) if (IOMMU_USED(rdip)) { return (iommulib_nexdma_sync(dip, rdip, handle, off, len, cache_flags)); @@ -4516,7 +4503,7 @@ uint_t win, off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp) { -#if !defined(__xpv) +#if defined(__amd64) && !defined(__xpv) if (IOMMU_USED(rdip)) { return (iommulib_nexdma_win(dip, rdip, handle, win, offp, lenp, cookiep, ccountp)); @@ -4916,8 +4903,8 @@ end_addr = start_addr + csize; /* - * if the faulted address is within the physical address - * range of the cookie, return DDI_FM_NONFATAL. + * if the faulted address is within the physical + * address of the cookie, return DDI_FM_NONFATAL. */ if ((fault_addr >= start_addr) && (fault_addr <= end_addr)) { @@ -4929,3 +4916,34 @@ /* fault_addr not within this DMA handle */ return (DDI_FM_UNKNOWN); } + +/*ARGSUSED*/ +static int +rootnex_quiesce(dev_info_t *dip) +{ +#if defined(__amd64) && !defined(__xpv) + return (immu_quiesce()); +#else + return (DDI_SUCCESS); +#endif +} + +#if defined(__xpv) +void +immu_init(void) +{ + ; +} + +void +immu_startup(void) +{ + ; +} +/*ARGSUSED*/ +void +immu_physmem_update(uint64_t addr, uint64_t size) +{ + ; +} +#endif
--- a/usr/src/uts/i86pc/os/ddi_impl.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/os/ddi_impl.c Sat Jan 30 18:23:16 2010 -0800 @@ -101,6 +101,10 @@ static int kmem_override_cache_attrs(caddr_t, size_t, uint_t); +#if defined(__amd64) && !defined(__xpv) +extern void immu_init(void); +#endif + #define CTGENTRIES 15 static struct ctgas { @@ -202,6 +206,18 @@ /* reprogram devices not set up by firmware (BIOS) */ impl_bus_reprobe(); +#if defined(__amd64) && !defined(__xpv) + /* + * Setup but don't startup the IOMMU + * Startup happens later via a direct call + * to IOMMU code by boot code. + * At this point, all PCI bus renumbering + * is done, so safe to init the IMMU + * AKA Intel IOMMU. + */ + immu_init(); +#endif + /* * attach the isa nexus to get ACPI resource usage * isa is "kind of" a pseudo node
--- a/usr/src/uts/i86pc/os/fakebop.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/os/fakebop.c Sat Jan 30 18:23:16 2010 -0800 @@ -58,7 +58,6 @@ #endif #include <vm/kboot_mmu.h> #include <vm/hat_pte.h> -#include <sys/dmar_acpi.h> #include <sys/kobj.h> #include <sys/kobj_lex.h> #include <sys/pci_cfgspace_impl.h> @@ -2225,13 +2224,6 @@ tp->number * tp->number); } -static void -process_dmar(struct dmar *tp) -{ - bsetprop(DMAR_TABLE_PROPNAME, strlen(DMAR_TABLE_PROPNAME), - tp, tp->hdr.len); -} - #else /* __xpv */ static void enumerate_xen_cpus() @@ -2274,8 +2266,6 @@ if (slit_ptr = (struct slit *)find_fw_table("SLIT")) process_slit(slit_ptr); - if (tp = find_fw_table("DMAR")) - process_dmar((struct dmar *)tp); tp = find_fw_table("MCFG"); #else /* __xpv */ enumerate_xen_cpus();
--- a/usr/src/uts/i86pc/os/startup.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/os/startup.c Sat Jan 30 18:23:16 2010 -0800 @@ -151,6 +151,10 @@ void *gfx_devinfo_list; +#if defined(__amd64) && !defined(__xpv) +extern void immu_startup(void); +#endif + /* * XXX make declaration below "static" when drivers no longer use this * interface. @@ -171,10 +175,6 @@ static void startup_end(void); static void layout_kernel_va(void); -#if !defined(__xpv) -void (*rootnex_iommu_init)(void) = NULL; -#endif - /* * Declare these as initialized data so we can patch them. */ @@ -2137,11 +2137,14 @@ xs_domu_init(); #endif -#if !defined(__xpv) - if (rootnex_iommu_init != NULL) { - rootnex_iommu_init(); - } +#if defined(__amd64) && !defined(__xpv) + /* + * Intel IOMMU has been setup/initialized in ddi_impl.c + * Start it up now. + */ + immu_startup(); #endif + PRM_POINT("Enabling interrupts"); (*picinitf)(); sti();
--- a/usr/src/uts/i86pc/rootnex/Makefile Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/rootnex/Makefile Sat Jan 30 18:23:16 2010 -0800 @@ -20,7 +20,7 @@ # # # uts/i86pc/rootnex/Makefile -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the rootnex driver @@ -85,7 +85,7 @@ modlintlib: $(MODLINTLIB_DEPS) -clean.lint: $(CLEAN_LINT_DEPS) +clean.lint: $(CLEAN_DEPS) install: $(INSTALL_DEPS) $(CONF_INSTALL_DEPS)
--- a/usr/src/uts/i86pc/sys/apic.h Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/sys/apic.h Sat Jan 30 18:23:16 2010 -0800 @@ -486,7 +486,7 @@ uint_t airq_busy; /* How frequently did clock find */ /* us in this */ struct apic_irq *airq_next; /* chain of intpts sharing a vector */ - void *airq_intrr_private; /* intr remap private data */ + void *airq_intrmap_private; /* intr remap private data */ } apic_irq_t; #define IRQ_USER_BOUND 0x80000000 /* user requested bind if set in airq_cpu */ @@ -556,15 +556,15 @@ /* * APIC ops to support intel interrupt remapping */ -typedef struct apic_intrr_ops { - int (*apic_intrr_init)(int); - void (*apic_intrr_enable)(int); - void (*apic_intrr_alloc_entry)(apic_irq_t *); - void (*apic_intrr_map_entry)(apic_irq_t *, void *); - void (*apic_intrr_free_entry)(apic_irq_t *); - void (*apic_intrr_record_rdt)(apic_irq_t *, ioapic_rdt_t *); - void (*apic_intrr_record_msi)(apic_irq_t *, msi_regs_t *); -} apic_intrr_ops_t; +typedef struct apic_intrmap_ops { + int (*apic_intrmap_init)(int); + void (*apic_intrmap_enable)(int); + void (*apic_intrmap_alloc_entry)(apic_irq_t *); + void (*apic_intrmap_map_entry)(apic_irq_t *, void *); + void (*apic_intrmap_free_entry)(apic_irq_t *); + void (*apic_intrmap_record_rdt)(apic_irq_t *, ioapic_rdt_t *); + void (*apic_intrmap_record_msi)(apic_irq_t *, msi_regs_t *); +} apic_intrmap_ops_t; /* * Various poweroff methods and ports & bits for them @@ -862,7 +862,7 @@ extern void apic_set_directed_EOI_handler(); extern int apic_directed_EOI_supported(); -extern apic_intrr_ops_t *apic_vt_ops; +extern apic_intrmap_ops_t *apic_vt_ops; #ifdef __cplusplus }
--- a/usr/src/uts/i86pc/sys/dmar_acpi.h Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,236 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Portions Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2008, Intel Corporation. - * All rights reserved. - */ - -#ifndef _SYS_DMAR_ACPI_H -#define _SYS_DMAR_ACPI_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define DMAR_TABLE_PROPNAME "dmar-table" - -#define DMAR_UNIT_TYPE_DRHD 0 -#define DMAR_UNIT_TYPE_RMRR 1 -#define DMAR_UNIT_TYPE_ATSR 2 - -#define DEV_SCOPE_ENDPOINT 1 -#define DEV_SCOPE_P2P 2 -#define DEV_SCOPE_IOAPIC 3 -#define DEV_SCOPE_HPET 4 - -#define INCLUDE_PCI_ALL 0x01 -#define DMAR_MAX_SEGMENT 1 - -#define IOMMU_PAGE_SIZE_4K (1UL << 12) -#define IOMMU_REG_SIZE (1UL << 12) -#define PARSE_DMAR_SUCCESS 1 -#define PARSE_DMAR_FAIL 0 - -#define for_each_in_list(list, node) \ - for (node = list_head(list); node != NULL; \ - node = list_next(list, node)) - -/* - * The following structure describes the formate of - * DMAR ACPI table format. They are used to parse - * DMAR ACPI table. - * - * Read the spec for the meaning of each member. - */ - -/* DMAR ACPI table header */ -typedef struct dmar_acpi_head { - char dh_sig[4]; - uint32_t dh_len; - uint8_t dh_rev; - uint8_t dh_checksum; - char dh_oemid[6]; - char dh_oemtblid[8]; - uint32_t dh_oemrev; - char dh_asl[4]; - uint32_t dh_aslrev; - uint8_t dh_haw; - uint8_t dh_flags; - uint8_t dh_reserved[10]; -} dmar_acpi_head_t; - -/* Remapping structure header */ -typedef struct dmar_acpi_unit_head { - uint16_t uh_type; - uint16_t uh_length; -} dmar_acpi_unit_head_t; - -/* DRHD unit structure */ -typedef struct dmar_acpi_drhd { - dmar_acpi_unit_head_t dr_header; - uint8_t dr_flags; - uint8_t dr_reserved; - uint16_t dr_segment; - uint64_t dr_baseaddr; -} dmar_acpi_drhd_t; - -/* Device scope structure */ -typedef struct dmar_acpi_dev_scope { - uint8_t ds_type; - uint8_t ds_length; - uint8_t ds_reserved[2]; - uint8_t ds_enumid; - uint8_t ds_sbusnum; -} dmar_acpi_dev_scope_t; - -/* RMRR unit structure */ -typedef struct dmar_acpi_rmrr { - dmar_acpi_unit_head_t rm_header; - uint8_t rm_reserved[2]; - uint16_t rm_segment; - uint64_t rm_baseaddr; - uint64_t rm_limiaddr; -} dmar_acpi_rmrr_t; - -/* - * The following structures describes kernel recorded - * information about the DRHD and RMRR. - */ - -/* - * DRHD information structure - * - * node - the drhd info structure is inserted in the - * list embedded in the intel_dmar_info - * di_segment - the pci segment associated with this drhd - * di_reg_base - base address of the register set, the size - * of this set is 4K - * di_include_all - is it an include_all unit - * di_dev_list - the dev_info list get from the device scope, - * the node of this list is pci_dev_info_t, - * which present a single pci device - * di_dip - pointer to the dev_info for this drhd in the - * device tree - * di_iommu - link to the iommu state structure - */ -typedef struct drhd_info { - list_node_t node; - uint16_t di_segment; - uint64_t di_reg_base; - boolean_t di_include_all; - list_t di_dev_list; - dev_info_t *di_dip; - void *di_iommu; -} drhd_info_t; - -/* - * RMRR information structure - * - * node - the rmrr info structure is inserted in the - * list embedded in the intel_dmar_info - * ri_segment - the pci segment associated with this rmrr - * ri_baseaddr - the low address of the reserved range - * ri_limiaddr - the high address of the reserved range - * ri_dev_list - the dev_info list get from the device scope, - * the node of this list is pci_dev_info_t, w- - * hich present a single pci device - */ -typedef struct rmrr_info { - list_node_t node; - list_node_t node4states; - uint16_t ri_segment; - uint64_t ri_baseaddr; - uint64_t ri_limiaddr; - list_t ri_dev_list; -} rmrr_info_t; - -/* - * Intel IOMMU information structure - * - * dmari_haw - haw (host address width) indicates the max- - * imum DMA physical addressability by this - * platform. - * dmari_intr_remap - does this platform support intr remapping - * dmari_drhd - the list array of drhd units with the - * segment number as the index into this array - * dmari_rmrr - list array for the rmrr - */ -typedef struct intel_dmar_info { - uint8_t dmari_haw; - boolean_t dmari_intr_remap; - list_t dmari_drhd[DMAR_MAX_SEGMENT]; - list_t dmari_rmrr[DMAR_MAX_SEGMENT]; -} intel_dmar_info_t; - -/* - * The pci device node in the dev_list of drhd_info and - * rmrr_info - * - * node - list node - * bus, dev, func - bus, device and function number of - * - this pci device - * pdi_type - type of this device, includes - * 0x01 : pci endpoint - * 0x02 : pci p2p bridge - * 0x03 : ioapci - * 0x04 : msi capable hpet - * pdi_sec_bus - record the bus number of the PCI bus - * segment to which the secondary interface - * of the bridge is connected - * pdi_sub_bus - record the bus number of the highest - * numbered PCI bus segment which is behind - * (or subordinate to) the bridge - */ -typedef struct pci_dev_scope { - list_node_t node; - uint8_t pds_bus; - uint8_t pds_dev; - uint8_t pds_func; - uint8_t pds_type; -} pci_dev_scope_t; - -extern boolean_t intel_iommu_support; -extern intel_dmar_info_t *dmar_info; -extern void intel_iommu_release_dmar_info(void); -extern void intel_iommu_probe_and_parse(void); - -/* - * interrupt source id and drhd info for ioapic - */ -typedef struct ioapic_drhd_info { - list_node_t node; - uchar_t ioapic_id; /* ioapic id */ - uint16_t sid; /* ioapic source id */ - drhd_info_t *drhd; -} ioapic_drhd_info_t; - -extern list_t ioapic_drhd_infos; - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_DMAR_ACPI_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/sys/immu.h Sat Jan 30 18:23:16 2010 -0800 @@ -0,0 +1,835 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2008, Intel Corporation. + * All rights reserved. + */ + +#ifndef _SYS_INTEL_IOMMU_H +#define _SYS_INTEL_IOMMU_H + +/* + * Intel IOMMU implementation specific state + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/bitset.h> +#include <sys/kstat.h> +#include <sys/vmem.h> +#include <sys/rootnex.h> + +/* + * Some ON drivers have bugs. Keep this define until all such drivers + * have been fixed + */ +#define BUGGY_DRIVERS 1 + +/* PD(T)E entries */ +typedef uint64_t hw_pdte_t; + +#define IMMU_MAXNAMELEN (64) +#define IMMU_MAXSEG (1) +#define IMMU_REGSZ (1UL << 12) +#define IMMU_PAGESIZE (4096) +#define IMMU_PAGESHIFT (12) +#define IMMU_PAGEOFFSET (IMMU_PAGESIZE - 1) +#define IMMU_PAGEMASK (~IMMU_PAGEOFFSET) +#define IMMU_BTOP(b) (((uint64_t)b) >> IMMU_PAGESHIFT) +#define IMMU_PTOB(p) (((uint64_t)p) << IMMU_PAGESHIFT) +#define IMMU_PGTABLE_MAX_LEVELS (6) +#define IMMU_ROUNDUP(size) (((size) + IMMU_PAGEOFFSET) & ~IMMU_PAGEOFFSET) +#define IMMU_ROUNDOWN(addr) ((addr) & ~IMMU_PAGEOFFSET) +#define IMMU_PGTABLE_LEVEL_STRIDE (9) +#define IMMU_PGTABLE_LEVEL_MASK ((1<<IMMU_PGTABLE_LEVEL_STRIDE) - 1) +#define IMMU_PGTABLE_OFFSHIFT (IMMU_PAGESHIFT - IMMU_PGTABLE_LEVEL_STRIDE) +#define IMMU_PGTABLE_MAXIDX ((IMMU_PAGESIZE / sizeof (hw_pdte_t)) - 1) + +#define IMMU_ROUNDUP(size) (((size) + IMMU_PAGEOFFSET) & ~IMMU_PAGEOFFSET) +#define IMMU_ROUNDOWN(addr) ((addr) & ~IMMU_PAGEOFFSET) + +/* + * DMAR global defines + */ +#define DMAR_TABLE "dmar-table" +#define DMAR_INTRMAP_SUPPORT (0x01) + +/* DMAR unit types */ +#define DMAR_DRHD 0 +#define DMAR_RMRR 1 +#define DMAR_ATSR 2 +#define DMAR_RHSA 3 + +/* DRHD flag values */ +#define DMAR_INCLUDE_ALL (0x01) + +/* Device scope types */ +#define DMAR_ENDPOINT 1 +#define DMAR_SUBTREE 2 +#define DMAR_IOAPIC 3 +#define DMAR_HPET 4 + + +/* Forward declarations for IOMMU state structure and DVMA domain struct */ +struct immu; +struct domain; + +/* + * The following structure describes the formate of DMAR ACPI table format. + * They are used to parse DMAR ACPI table. Read the spec for the meaning + * of each member. + */ + +/* lengths of various strings */ +#define DMAR_SIG_LEN (4) /* table signature */ +#define DMAR_OEMID_LEN (6) /* OEM ID */ +#define DMAR_TBLID_LEN (8) /* OEM table ID */ +#define DMAR_ASL_LEN (4) /* ASL len */ + +typedef struct dmar_table { + kmutex_t tbl_lock; + uint8_t tbl_haw; + boolean_t tbl_intrmap; + list_t tbl_drhd_list[IMMU_MAXSEG]; + list_t tbl_rmrr_list[IMMU_MAXSEG]; + char *tbl_oem_id; + char *tbl_oem_tblid; + uint32_t tbl_oem_rev; + caddr_t tbl_raw; + int tbl_rawlen; +} dmar_table_t; + +typedef struct drhd { + kmutex_t dr_lock; /* protects the dmar field */ + struct immu *dr_immu; + dev_info_t *dr_dip; + uint16_t dr_seg; + uint64_t dr_regs; + boolean_t dr_include_all; + list_t dr_scope_list; + list_node_t dr_node; +} drhd_t; + +typedef struct rmrr { + kmutex_t rm_lock; + uint16_t rm_seg; + uint64_t rm_base; + uint64_t rm_limit; + list_t rm_scope_list; + list_node_t rm_node; +} rmrr_t; + +/* + * Macros based on PCI spec + */ +#define IMMU_PCI_DEV(devfunc) ((uint64_t)devfunc >> 3) /* from devfunc */ +#define IMMU_PCI_FUNC(devfunc) (devfunc & 7) /* get func from devfunc */ +#define IMMU_PCI_DEVFUNC(d, f) (((d) << 3) | (f)) /* create devfunc */ + +typedef struct scope { + uint8_t scp_type; + uint8_t scp_enumid; + uint8_t scp_bus; + uint8_t scp_dev; + uint8_t scp_func; + list_node_t scp_node; +} scope_t; + +/* + * interrupt source id and drhd info for ioapic + */ +typedef struct ioapic_drhd { + uchar_t ioapic_ioapicid; + uint16_t ioapic_sid; /* ioapic source id */ + drhd_t *ioapic_drhd; + list_node_t ioapic_node; +} ioapic_drhd_t; + +typedef struct memrng { + uint64_t mrng_start; + uint64_t mrng_npages; +} memrng_t; + +typedef enum immu_flags { + IMMU_FLAGS_NONE = 0x1, + IMMU_FLAGS_SLEEP = 0x1, + IMMU_FLAGS_NOSLEEP = 0x2, + IMMU_FLAGS_READ = 0x4, + IMMU_FLAGS_WRITE = 0x8, + IMMU_FLAGS_DONTPASS = 0x10, + IMMU_FLAGS_ALLOC = 0x20, + IMMU_FLAGS_MUST_MATCH = 0x40, + IMMU_FLAGS_PAGE1 = 0x80, + IMMU_FLAGS_UNITY = 0x100, + IMMU_FLAGS_DMAHDL = 0x200, + IMMU_FLAGS_MEMRNG = 0x400 +} immu_flags_t; + +typedef enum cont_avail { + IMMU_CONT_BAD = 0x0, + IMMU_CONT_UNINITED = 0x1, + IMMU_CONT_INITED = 0x2 +} cont_avail_t; + +/* Size of root and context tables and their entries */ +#define IMMU_ROOT_TBLSZ (4096) +#define IMMU_CONT_TBLSZ (4096) +#define IMMU_ROOT_NUM (256) +#define IMMU_CONT_NUM (256) + +/* register offset */ +#define IMMU_REG_VERSION (0x00) /* Version Rigister, 32 bit */ +#define IMMU_REG_CAP (0x08) /* Capability Register, 64 bit */ +#define IMMU_REG_EXCAP (0x10) /* Extended Capability Reg, 64 bit */ +#define IMMU_REG_GLOBAL_CMD (0x18) /* Global Command Register, 32 bit */ +#define IMMU_REG_GLOBAL_STS (0x1C) /* Global Status Register, 32 bit */ +#define IMMU_REG_ROOTENTRY (0x20) /* Root-Entry Table Addr Reg, 64 bit */ +#define IMMU_REG_CONTEXT_CMD (0x28) /* Context Comand Register, 64 bit */ +#define IMMU_REG_FAULT_STS (0x34) /* Fault Status Register, 32 bit */ +#define IMMU_REG_FEVNT_CON (0x38) /* Fault Event Control Reg, 32 bit */ +#define IMMU_REG_FEVNT_DATA (0x3C) /* Fault Event Data Register, 32 bit */ +#define IMMU_REG_FEVNT_ADDR (0x40) /* Fault Event Address Reg, 32 bit */ +#define IMMU_REG_FEVNT_UADDR (0x44) /* Fault Event Upper Addr Reg, 32 bit */ +#define IMMU_REG_AFAULT_LOG (0x58) /* Advanced Fault Log Reg, 64 bit */ +#define IMMU_REG_PMER (0x64) /* Protected Memory Enble Reg, 32 bit */ +#define IMMU_REG_PLMBR (0x68) /* Protected Low Mem Base Reg, 32 bit */ +#define IMMU_REG_PLMLR (0x6C) /* Protected Low Mem Lim Reg, 32 bit */ +#define IMMU_REG_PHMBR (0X70) /* Protectd High Mem Base Reg, 64 bit */ +#define IMMU_REG_PHMLR (0x78) /* Protected High Mem Lim Reg, 64 bit */ +#define IMMU_REG_INVAL_QH (0x80) /* Invalidation Queue Head, 64 bit */ +#define IMMU_REG_INVAL_QT (0x88) /* Invalidation Queue Tail, 64 bit */ +#define IMMU_REG_INVAL_QAR (0x90) /* Invalidtion Queue Addr Reg, 64 bit */ +#define IMMU_REG_INVAL_CSR (0x9C) /* Inval Compl Status Reg, 32 bit */ +#define IMMU_REG_INVAL_CECR (0xA0) /* Inval Compl Evnt Ctrl Reg, 32 bit */ +#define IMMU_REG_INVAL_CEDR (0xA4) /* Inval Compl Evnt Data Reg, 32 bit */ +#define IMMU_REG_INVAL_CEAR (0xA8) /* Inval Compl Event Addr Reg, 32 bit */ +#define IMMU_REG_INVAL_CEUAR (0xAC) /* Inval Comp Evnt Up Addr reg, 32bit */ +#define IMMU_REG_IRTAR (0xB8) /* INTR Remap Tbl Addr Reg, 64 bit */ + +/* ioapic memory region */ +#define IOAPIC_REGION_START (0xfee00000) +#define IOAPIC_REGION_END (0xfeefffff) + +/* fault register */ +#define IMMU_FAULT_STS_PPF (2) +#define IMMU_FAULT_STS_PFO (1) +#define IMMU_FAULT_STS_ITE (1 << 6) +#define IMMU_FAULT_STS_ICE (1 << 5) +#define IMMU_FAULT_STS_IQE (1 << 4) +#define IMMU_FAULT_GET_INDEX(x) ((((uint64_t)x) >> 8) & 0xff) +#define IMMU_FRR_GET_F(x) (((uint64_t)x) >> 63) +#define IMMU_FRR_GET_FR(x) ((((uint64_t)x) >> 32) & 0xff) +#define IMMU_FRR_GET_FT(x) ((((uint64_t)x) >> 62) & 0x1) +#define IMMU_FRR_GET_SID(x) ((x) & 0xffff) + +/* (ex)capability register */ +#define IMMU_CAP_GET_NFR(x) (((((uint64_t)x) >> 40) & 0xff) + 1) +#define IMMU_CAP_GET_DWD(x) ((((uint64_t)x) >> 54) & 1) +#define IMMU_CAP_GET_DRD(x) ((((uint64_t)x) >> 55) & 1) +#define IMMU_CAP_GET_PSI(x) ((((uint64_t)x) >> 39) & 1) +#define IMMU_CAP_GET_SPS(x) ((((uint64_t)x) >> 34) & 0xf) +#define IMMU_CAP_GET_ISOCH(x) ((((uint64_t)x) >> 23) & 1) +#define IMMU_CAP_GET_ZLR(x) ((((uint64_t)x) >> 22) & 1) +#define IMMU_CAP_GET_MAMV(x) ((((uint64_t)x) >> 48) & 0x3f) +#define IMMU_CAP_GET_CM(x) ((((uint64_t)x) >> 7) & 1) +#define IMMU_CAP_GET_PHMR(x) ((((uint64_t)x) >> 6) & 1) +#define IMMU_CAP_GET_PLMR(x) ((((uint64_t)x) >> 5) & 1) +#define IMMU_CAP_GET_RWBF(x) ((((uint64_t)x) >> 4) & 1) +#define IMMU_CAP_GET_AFL(x) ((((uint64_t)x) >> 3) & 1) +#define IMMU_CAP_GET_FRO(x) (((((uint64_t)x) >> 24) & 0x3ff) * 16) +#define IMMU_CAP_MGAW(x) (((((uint64_t)x) >> 16) & 0x3f) + 1) +#define IMMU_CAP_SAGAW(x) ((((uint64_t)x) >> 8) & 0x1f) +#define IMMU_CAP_ND(x) (1 << (((x) & 0x7) *2 + 4)) -1 +#define IMMU_ECAP_GET_IRO(x) (((((uint64_t)x) >> 8) & 0x3ff) << 4) +#define IMMU_ECAP_GET_MHMV(x) (((uint64_t)x >> 20) & 0xf) +#define IMMU_ECAP_GET_SC(x) ((x) & 0x80) +#define IMMU_ECAP_GET_PT(x) ((x) & 0x40) +#define IMMU_ECAP_GET_CH(x) ((x) & 0x20) +#define IMMU_ECAP_GET_EIM(x) ((x) & 0x10) +#define IMMU_ECAP_GET_IR(x) ((x) & 0x8) +#define IMMU_ECAP_GET_DI(x) ((x) & 0x4) +#define IMMU_ECAP_GET_QI(x) ((x) & 0x2) +#define IMMU_ECAP_GET_C(x) ((x) & 0x1) + +#define IMMU_CAP_SET_RWBF(x) ((x) |= (1 << 4)) + + +/* iotlb invalidation */ +#define TLB_INV_GLOBAL (((uint64_t)1) << 60) +#define TLB_INV_DOMAIN (((uint64_t)2) << 60) +#define TLB_INV_PAGE (((uint64_t)3) << 60) +#define TLB_INV_GET_IAIG(x) ((((uint64_t)x) >> 57) & 7) +#define TLB_INV_DRAIN_READ (((uint64_t)1) << 49) +#define TLB_INV_DRAIN_WRITE (((uint64_t)1) << 48) +#define TLB_INV_DID(x) (((uint64_t)((x) & 0xffff)) << 32) +#define TLB_INV_IVT (((uint64_t)1) << 63) +#define TLB_IVA_HINT(x) (((x) & 0x1) << 6) +#define TLB_IVA_LEAF 1 +#define TLB_IVA_WHOLE 0 + +/* dont use value 0 for enums - to catch unit 8 */ +typedef enum iotlb_inv { + IOTLB_PSI = 1, + IOTLB_DSI, + IOTLB_GLOBAL +} immu_iotlb_inv_t; + +typedef enum context_inv { + CONTEXT_FSI = 1, + CONTEXT_DSI, + CONTEXT_GLOBAL +} immu_context_inv_t; + +/* context invalidation */ +#define CCMD_INV_ICC (((uint64_t)1) << 63) +#define CCMD_INV_GLOBAL (((uint64_t)1) << 61) +#define CCMD_INV_DOMAIN (((uint64_t)2) << 61) +#define CCMD_INV_DEVICE (((uint64_t)3) << 61) +#define CCMD_INV_DID(x) ((uint64_t)((x) & 0xffff)) +#define CCMD_INV_SID(x) (((uint64_t)((x) & 0xffff)) << 16) +#define CCMD_INV_FM(x) (((uint64_t)((x) & 0x3)) << 32) + +/* global command register */ +#define IMMU_GCMD_TE (((uint32_t)1) << 31) +#define IMMU_GCMD_SRTP (((uint32_t)1) << 30) +#define IMMU_GCMD_SFL (((uint32_t)1) << 29) +#define IMMU_GCMD_EAFL (((uint32_t)1) << 28) +#define IMMU_GCMD_WBF (((uint32_t)1) << 27) +#define IMMU_GCMD_QIE (((uint32_t)1) << 26) +#define IMMU_GCMD_IRE (((uint32_t)1) << 25) +#define IMMU_GCMD_SIRTP (((uint32_t)1) << 24) +#define IMMU_GCMD_CFI (((uint32_t)1) << 23) + +/* global status register */ +#define IMMU_GSTS_TES (((uint32_t)1) << 31) +#define IMMU_GSTS_RTPS (((uint32_t)1) << 30) +#define IMMU_GSTS_FLS (((uint32_t)1) << 29) +#define IMMU_GSTS_AFLS (((uint32_t)1) << 28) +#define IMMU_GSTS_WBFS (((uint32_t)1) << 27) +#define IMMU_GSTS_QIES (((uint32_t)1) << 26) +#define IMMU_GSTS_IRES (((uint32_t)1) << 25) +#define IMMU_GSTS_IRTPS (((uint32_t)1) << 24) +#define IMMU_GSTS_CFIS (((uint32_t)1) << 23) + +/* psi address mask */ +#define ADDR_AM_MAX(m) (((uint_t)1) << (m)) +#define ADDR_AM_OFFSET(n, m) ((n) & (ADDR_AM_MAX(m) - 1)) + +/* dmar fault event */ +#define IMMU_INTR_IPL (8) +#define IMMU_REG_FEVNT_CON_IM_SHIFT (31) + +#define IMMU_ALLOC_RESOURCE_DELAY (drv_usectohz(5000)) + +/* max value of Size field of Interrupt Remapping Table Address Register */ +#define INTRMAP_MAX_IRTA_SIZE 0xf + +/* interrupt remapping table entry size */ +#define INTRMAP_RTE_SIZE 0x10 + +/* ioapic redirection table entry related shift of remappable interrupt */ +#define INTRMAP_IOAPIC_IDX_SHIFT 17 +#define INTRMAP_IOAPIC_FORMAT_SHIFT 16 +#define INTRMAP_IOAPIC_TM_SHIFT 15 +#define INTRMAP_IOAPIC_POL_SHIFT 13 +#define INTRMAP_IOAPIC_IDX15_SHIFT 11 + +/* msi intr entry related shift of remappable interrupt */ +#define INTRMAP_MSI_IDX_SHIFT 5 +#define INTRMAP_MSI_FORMAT_SHIFT 4 +#define INTRMAP_MSI_SHV_SHIFT 3 +#define INTRMAP_MSI_IDX15_SHIFT 2 + +#define INTRMAP_IDX_FULL (uint_t)-1 + +#define RDT_DLM(rdt) BITX((rdt), 10, 8) +#define RDT_DM(rdt) BT_TEST(&(rdt), 11) +#define RDT_POL(rdt) BT_TEST(&(rdt), 13) +#define RDT_TM(rdt) BT_TEST(&(rdt), 15) + +#define INTRMAP_DISABLE (void *)-1 + +/* + * invalidation granularity + */ +typedef enum { + TLB_INV_G_GLOBAL = 1, + TLB_INV_G_DOMAIN, + TLB_INV_G_PAGE +} tlb_inv_g_t; + +typedef enum { + CTT_INV_G_GLOBAL = 1, + CTT_INV_G_DOMAIN, + CTT_INV_G_DEVICE +} ctt_inv_g_t; + +typedef enum { + IEC_INV_GLOBAL = 0, + IEC_INV_INDEX +} iec_inv_g_t; + + +struct inv_queue_state; +struct intrmap_tbl_state; + +/* A software page table structure */ +typedef struct pgtable { + krwlock_t swpg_rwlock; + caddr_t hwpg_vaddr; /* HW pgtable VA */ + paddr_t hwpg_paddr; /* HW pgtable PA */ + ddi_dma_handle_t hwpg_dmahdl; + ddi_acc_handle_t hwpg_memhdl; + struct pgtable **swpg_next_array; + list_node_t swpg_domain_node; /* domain list of pgtables */ +} pgtable_t; + +/* interrupt remapping table state info */ +typedef struct intrmap { + kmutex_t intrmap_lock; + ddi_dma_handle_t intrmap_dma_hdl; + ddi_acc_handle_t intrmap_acc_hdl; + caddr_t intrmap_vaddr; + paddr_t intrmap_paddr; + uint_t intrmap_size; + bitset_t intrmap_map; + uint_t intrmap_free; +} intrmap_t; + +typedef struct hw_rce { + uint64_t lo; + uint64_t hi; +} hw_rce_t; + + +#define ROOT_GET_P(hrent) ((hrent)->lo & 0x1) +#define ROOT_SET_P(hrent) ((hrent)->lo |= 0x1) + +#define ROOT_GET_CONT(hrent) ((hrent)->lo & ~(0xFFF)) +#define ROOT_SET_CONT(hrent, paddr) ((hrent)->lo |= (paddr & (~0xFFF))) + +#define TTYPE_XLATE_ONLY (0x0) +#define TTYPE_XLATE_IOTLB (0x1) +#define TTYPE_PASSTHRU (0x2) +#define TTYPE_RESERVED (0x3) + +#define CONT_GET_DID(hcent) ((((uint64_t)(hcent)->hi) >> 8) & 0xFFFF) +#define CONT_SET_DID(hcent, did) ((hcent)->hi |= ((0xFFFF & (did)) << 8)) + +#define CONT_GET_AVAIL(hcent) ((((uint64_t)((hcent)->hi)) >> 0x3) & 0xF) +#define CONT_SET_AVAIL(hcent, av) ((hcent)->hi |= ((0xF & (av)) << 0x3)) + +#define CONT_GET_LO_AW(hcent) (30 + 9 *((hcent)->hi & 0x7)) +#define CONT_GET_AW(hcent) \ + ((CONT_GET_LO_AW(hcent) == 66) ? 64 : CONT_GET_LO_AW(hcent)) +#define CONT_SET_AW(hcent, aw) \ + ((hcent)->hi |= (((((aw) + 2) - 30) / 9) & 0x7)) + +#define CONT_GET_ASR(hcent) ((hcent)->lo & ~(0xFFF)) +#define CONT_SET_ASR(hcent, paddr) ((hcent)->lo |= (paddr & (~0xFFF))) + +#define CONT_GET_TTYPE(hcent) ((((uint64_t)(hcent)->lo) >> 0x2) & 0x3) +#define CONT_SET_TTYPE(hcent, ttype) ((hcent)->lo |= (((ttype) & 0x3) << 0x2)) + +#define CONT_GET_P(hcent) ((hcent)->lo & 0x1) +#define CONT_SET_P(hcent) ((hcent)->lo |= 0x1) + + +/* we use the bit 63 (available for system SW) as a present bit */ +#define PDTE_SW4(hw_pdte) ((hw_pdte) & ((uint64_t)1<<63)) +#define PDTE_CLEAR_SW4(hw_pdte) ((hw_pdte) &= ~((uint64_t)1<<63)) + +#define PDTE_P(hw_pdte) ((hw_pdte) & ((uint64_t)1<<63)) +#define PDTE_CLEAR_P(hw_pdte) ((hw_pdte) &= ~((uint64_t)1<<63)) +#define PDTE_SET_P(hw_pdte) ((hw_pdte) |= ((uint64_t)1<<63)) + +#define PDTE_TM(hw_pdte) ((hw_pdte) & ((uint64_t)1<<62)) +#define PDTE_CLEAR_TM(hw_pdte) ((hw_pdte) &= ~((uint64_t)1<<62)) + +#define PDTE_SW3(hw_pdte) \ + (((hw_pdte) & ~(((uint64_t)0x3<<62)|(((uint64_t)1<<52)-1))) >> 52) +#define PDTE_SW3_OVERFLOW(hw_pdte) \ + (PDTE_SW3(hw_pdte) == 0x3FF) +#define PDTE_CLEAR_SW3(hw_pdte) \ + ((hw_pdte) &= (((uint64_t)0x3<<62)|(((uint64_t)1<<52)-1))) +#define PDTE_SET_SW3(hw_pdte, ref) \ + ((hw_pdte) |= ((((uint64_t)(ref)) & 0x3FF) << 52)) + +#define PDTE_PADDR(hw_pdte) ((hw_pdte) & ~(((uint64_t)0xFFF<<52)|((1<<12)-1))) +#define PDTE_CLEAR_PADDR(hw_pdte) \ + ((hw_pdte) &= (((uint64_t)0xFFF<<52)|((1<<12)-1))) +#define PDTE_SET_PADDR(hw_pdte, paddr) ((hw_pdte) |= PDTE_PADDR(paddr)) + +#define PDTE_SNP(hw_pdte) ((hw_pdte) & (1<<11)) +#define PDTE_CLEAR_SNP(hw_pdte) ((hw_pdte) &= ~(1<<11)) +#define PDTE_SET_SNP(hw_pdte) ((hw_pdte) |= (1<<11)) + +#define PDTE_SW2(hw_pdte) ((hw_pdte) & (0x700)) +#define PDTE_CLEAR_SW2(hw_pdte) ((hw_pdte) &= ~(0x700)) + +#define PDTE_SP(hw_pdte) ((hw_pdte) & (0x80)) +#define PDTE_CLEAR_SP(hw_pdte) ((hw_pdte) &= ~(0x80)) + +#define PDTE_SW1(hw_pdte) ((hw_pdte) & (0x7C)) +#define PDTE_CLEAR_SW1(hw_pdte) ((hw_pdte) &= ~(0x7C)) + +#define PDTE_WRITE(hw_pdte) ((hw_pdte) & (0x2)) +#define PDTE_CLEAR_WRITE(hw_pdte) ((hw_pdte) &= ~(0x2)) +#define PDTE_SET_WRITE(hw_pdte) ((hw_pdte) |= (0x2)) + +#define PDTE_READ(hw_pdte) ((hw_pdte) & (0x1)) +#define PDTE_CLEAR_READ(hw_pdte) ((hw_pdte) &= ~(0x1)) +#define PDTE_SET_READ(hw_pdte) ((hw_pdte) |= (0x1)) + +typedef struct immu { + kmutex_t immu_lock; + char *immu_name; + + /* lock grabbed by interrupt handler */ + kmutex_t immu_intr_lock; + + /* ACPI/DMAR table related */ + void *immu_dmar_unit; + dev_info_t *immu_dip; + struct domain *immu_unity_domain; + + /* IOMMU register related */ + kmutex_t immu_regs_lock; + boolean_t immu_regs_setup; + boolean_t immu_regs_running; + boolean_t immu_regs_quiesced; + ddi_acc_handle_t immu_regs_handle; + caddr_t immu_regs_addr; + uint64_t immu_regs_cap; + uint64_t immu_regs_excap; + uint32_t immu_regs_cmdval; + uint32_t immu_regs_intr_msi_addr; + uint32_t immu_regs_intr_msi_data; + uint32_t immu_regs_intr_uaddr; + + /* DVMA related */ + kmutex_t immu_dvma_lock; + boolean_t immu_dvma_setup; + boolean_t immu_dvma_running; + int immu_dvma_gaw; + int immu_dvma_agaw; + int immu_dvma_nlevels; + boolean_t immu_dvma_coherent; + + /* DVMA context related */ + krwlock_t immu_ctx_rwlock; + pgtable_t *immu_ctx_root; + + /* DVMA domain related */ + int immu_max_domains; + vmem_t *immu_did_arena; + char immu_did_arena_name[IMMU_MAXNAMELEN]; + list_t immu_domain_list; + + /* DVMA special devices */ + boolean_t immu_dvma_gfx_only; + list_t immu_dvma_lpc_list; + list_t immu_dvma_gfx_list; + + /* interrupt remapping related */ + kmutex_t immu_intrmap_lock; + boolean_t immu_intrmap_setup; + boolean_t immu_intrmap_running; + intrmap_t *immu_intrmap; + uint64_t immu_intrmap_irta_reg; + + /* queued invalidation related */ + kmutex_t immu_qinv_lock; + boolean_t immu_qinv_setup; + boolean_t immu_qinv_running; + boolean_t immu_qinv_enabled; + void *immu_qinv; + uint64_t immu_qinv_reg_value; + + /* list_node for system-wide list of DMAR units */ + list_node_t immu_node; +} immu_t; + +/* properties that control DVMA */ +#define DDI_DVMA_MAPTYPE_PROP "ddi-dvma-mapping" + +/* property values */ +#define DDI_DVMA_MAPTYPE_UNITY "unity" + +typedef enum immu_maptype { + IMMU_MAPTYPE_BAD = 0, /* 0 is always bad */ + IMMU_MAPTYPE_UNITY = 1, + IMMU_MAPTYPE_XLATE +} immu_maptype_t; + +/* + * domain_t + * + */ +typedef struct domain { + /* the basics */ + uint_t dom_did; + immu_t *dom_immu; + + /* mapping related */ + immu_maptype_t dom_maptype; + vmem_t *dom_dvma_arena; + char dom_dvma_arena_name[IMMU_MAXNAMELEN]; + + /* pgtables */ + pgtable_t *dom_pgtable_root; + krwlock_t dom_pgtable_rwlock; + + /* list of pgtables for this domain */ + list_t dom_pglist; + + /* list node for list of domains (unity or xlate) */ + list_node_t dom_maptype_node; + /* list node for list of domains off immu */ + list_node_t dom_immu_node; +} domain_t; + +typedef enum immu_pcib { + IMMU_PCIB_BAD = 0, + IMMU_PCIB_NOBDF, + IMMU_PCIB_PCIE_PCIE, + IMMU_PCIB_PCIE_PCI, + IMMU_PCIB_PCI_PCI, + IMMU_PCIB_ENDPOINT +} immu_pcib_t; + +/* + * immu_devi_t + * Intel IOMMU in devinfo node + */ +typedef struct immu_devi { + /* pci seg, bus, dev, func */ + int imd_seg; + int imd_bus; + int imd_devfunc; + + /* ppb information */ + immu_pcib_t imd_pcib_type; + int imd_sec; + int imd_sub; + + /* identifier for special devices */ + boolean_t imd_display; + boolean_t imd_lpc; + + /* dmar unit to which this dip belongs */ + immu_t *imd_immu; + + /* domain ptr */ + domain_t *imd_domain; + dev_info_t *imd_ddip; + + /* my devinfo */ + dev_info_t *imd_dip; + + /* + * if we are a "special" devinfo + * the node for the special linked list + * off the DMAR unit structure + */ + list_node_t imd_spc_node; +} immu_devi_t; + +#define IMMU_DEVI(dip) ((immu_devi_t *)(DEVI(dip)->devi_iommu)) +#define IMMU_DEVI_SET(dip, imd) (DEVI(dip)->devi_iommu = (void *)imd) + +/* + * struct dmar_arg + */ +typedef struct immu_arg { + int ima_seg; + int ima_bus; + int ima_devfunc; + dev_info_t *ima_rdip; + dev_info_t *ima_ddip; +} immu_arg_t; + +/* + * Globals used by IOMMU code + */ +/* shared between IOMMU files */ +extern dev_info_t *root_devinfo; +extern kmutex_t immu_lock; +extern list_t immu_list; +extern boolean_t immu_setup; +extern boolean_t immu_running; +extern kmutex_t ioapic_drhd_lock; +extern list_t ioapic_drhd_list; + +/* switches */ + +/* Various features */ +extern boolean_t immu_enable; +extern boolean_t immu_dvma_enable; +extern boolean_t immu_gfxdvma_enable; +extern boolean_t immu_intrmap_enable; +extern boolean_t immu_qinv_enable; +extern boolean_t immu_mmio_safe; + +/* various quirks that need working around */ +extern boolean_t immu_quirk_usbpage0; +extern boolean_t immu_quirk_usbfullpa; +extern boolean_t immu_quirk_usbrmrr; +extern boolean_t immu_quirk_mobile4; + +/* debug messages */ +extern boolean_t immu_dmar_print; + +/* ################### Interfaces exported outside IOMMU code ############## */ +void immu_init(void); +void immu_startup(void); +void immu_shutdown(void); +void immu_destroy(void); +int immu_map_sgl(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, + int prealloc_count, dev_info_t *rdip); +int immu_unmap_sgl(ddi_dma_impl_t *hp, dev_info_t *rdip); +void immu_device_tree_changed(void); +void immu_physmem_update(uint64_t addr, uint64_t size); +int immu_quiesce(void); +int immu_unquiesce(void); +/* ######################################################################### */ + +/* ################# Interfaces used within IOMMU code #################### */ + +/* functions in rootnex.c */ +int rootnex_dvcookies_alloc(ddi_dma_impl_t *hp, + struct ddi_dma_req *dmareq, dev_info_t *rdip, void *arg); +void rootnex_dvcookies_free(dvcookie_t *dvcookies, void *arg); + +/* immu_dmar.c interfaces */ +int immu_dmar_setup(void); +int immu_dmar_parse(void); +void immu_dmar_startup(void); +void immu_dmar_shutdown(void); +void immu_dmar_destroy(void); +boolean_t immu_dmar_blacklisted(char **strings_array, uint_t nstrings); +immu_t *immu_dmar_get_immu(dev_info_t *rdip); +char *immu_dmar_unit_name(void *dmar_unit); +dev_info_t *immu_dmar_unit_dip(void *dmar_unit); +void immu_dmar_set_immu(void *dmar_unit, immu_t *immu); +void *immu_dmar_walk_units(int seg, void *dmar_unit); +boolean_t immu_dmar_intrmap_supported(void); +uint16_t immu_dmar_ioapic_sid(int ioapicid); +immu_t *immu_dmar_ioapic_immu(int ioapicid); +void immu_dmar_rmrr_map(void); + +/* immu.c interfaces */ +int immu_walk_ancestor(dev_info_t *rdip, dev_info_t *ddip, + int (*func)(dev_info_t *, void *arg), void *arg, + int *level, immu_flags_t immu_flags); + +/* immu_regs.c interfaces */ +void immu_regs_setup(list_t *immu_list); +void immu_regs_startup(immu_t *immu); +int immu_regs_resume(immu_t *immu); +void immu_regs_suspend(immu_t *immu); +void immu_regs_shutdown(immu_t *immu); +void immu_regs_destroy(list_t *immu_list); + +void immu_regs_intr(immu_t *immu, uint32_t msi_addr, uint32_t msi_data, + uint32_t uaddr); + +boolean_t immu_regs_passthru_supported(immu_t *immu); +boolean_t immu_regs_is_TM_reserved(immu_t *immu); +boolean_t immu_regs_is_SNP_reserved(immu_t *immu); + +void immu_regs_wbf_flush(immu_t *immu); +void immu_regs_cpu_flush(immu_t *immu, caddr_t addr, uint_t size); +void immu_regs_iotlb_flush(immu_t *immu, uint_t domainid, uint64_t dvma, + uint64_t count, uint_t hint, immu_iotlb_inv_t type); +void immu_regs_context_flush(immu_t *immu, uint8_t function_mask, + uint16_t source_id, uint_t did, immu_context_inv_t type); +void immu_regs_set_root_table(immu_t *immu); +void immu_regs_qinv_enable(immu_t *immu, uint64_t qinv_reg_value); +void immu_regs_intr_enable(immu_t *immu, uint32_t msi_addr, uint32_t msi_data, + uint32_t uaddr); +void immu_regs_intrmap_enable(immu_t *immu, uint64_t irta_reg); +uint64_t immu_regs_get64(immu_t *immu, uint_t reg); +void immu_regs_put64(immu_t *immu, uint_t reg, uint64_t val); +uint32_t immu_regs_get32(immu_t *immu, uint_t reg); +void immu_regs_put32(immu_t *immu, uint_t reg, uint32_t val); + +/* immu_dvma.c interfaces */ +void immu_dvma_setup(list_t *immu_list); +void immu_dvma_startup(immu_t *immu); +void immu_dvma_shutdown(immu_t *immu); +void immu_dvma_destroy(list_t *immu_list); + +void immu_dvma_physmem_update(uint64_t addr, uint64_t size); +int immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *, + uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags); +int immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip); +int immu_dvma_alloc(dvcookie_t *first_dvcookie, void *arg); +void immu_dvma_free(dvcookie_t *first_dvcookie, void *arg); +int immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags); +immu_devi_t *immu_devi_get(dev_info_t *dip); +immu_t *immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags); + + +/* immu_intrmap.c interfaces */ +void immu_intrmap_setup(list_t *immu_list); +void immu_intrmap_startup(immu_t *immu); +void immu_intrmap_shutdown(immu_t *immu); +void immu_intrmap_destroy(list_t *immu_list); + +/* registers interrupt handler for IOMMU unit */ +void immu_intr_register(immu_t *immu); +int immu_intr_handler(immu_t *immu); + + +/* immu_qinv.c interfaces */ +void immu_qinv_setup(list_t *immu_list); +void immu_qinv_startup(immu_t *immu); +void immu_qinv_shutdown(immu_t *immu); +void immu_qinv_destroy(list_t *immu_list); + +void immu_qinv_context_fsi(immu_t *immu, uint8_t function_mask, + uint16_t source_id, uint_t domain_id); +void immu_qinv_context_dsi(immu_t *immu, uint_t domain_id); +void immu_qinv_context_gbl(immu_t *immu); +void immu_qinv_iotlb_psi(immu_t *immu, uint_t domain_id, + uint64_t dvma, uint_t count, uint_t hint); +void immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id); +void immu_qinv_iotlb_gbl(immu_t *immu); +void immu_qinv_intr_global(immu_t *immu); +void immu_qinv_intr_one_cache(immu_t *immu, uint_t idx); +void immu_qinv_intr_caches(immu_t *immu, uint_t idx, uint_t cnt); +void immu_qinv_report_fault(immu_t *immu); + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_INTEL_IOMMU_H */
--- a/usr/src/uts/i86pc/sys/intel_iommu.h Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,733 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Portions Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2008, Intel Corporation. - * All rights reserved. - */ - -#ifndef _SYS_INTEL_IOMMU_H -#define _SYS_INTEL_IOMMU_H - -/* - * Intel IOMMU implementation specific state - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/types.h> -#include <sys/bitset.h> -#include <sys/dmar_acpi.h> -#include <sys/iommu_rscs.h> -#include <sys/cpu.h> -#include <sys/kstat.h> - -/* extern functions */ -extern int intel_iommu_attach_dmar_nodes(void); -extern int intel_iommu_map_sgl(ddi_dma_handle_t handle, - struct ddi_dma_req *dmareq, uint_t prealloc); -extern void intel_iommu_unmap_sgl(ddi_dma_handle_t handle); -extern void return_instr(void); - -/* define the return value for iommu_map_sgl */ -#define IOMMU_SGL_SUCCESS 0 -#define IOMMU_SGL_DISABLE 1 -#define IOMMU_SGL_NORESOURCES 2 - -/* register offset */ -#define IOMMU_REG_VERSION (0x00) /* Version Rigister, 32 bit */ -#define IOMMU_REG_CAP (0x08) /* Capability Register, 64 bit */ -#define IOMMU_REG_EXCAP (0x10) /* Extended Capability Reg, 64 bit */ -#define IOMMU_REG_GLOBAL_CMD (0x18) /* Global Command Register, 32 bit */ -#define IOMMU_REG_GLOBAL_STS (0x1C) /* Global Status Register, 32 bit */ -#define IOMMU_REG_ROOTENTRY (0x20) /* Root-Entry Table Addr Reg, 64 bit */ -#define IOMMU_REG_CONTEXT_CMD (0x28) /* Context Comand Register, 64 bit */ -#define IOMMU_REG_FAULT_STS (0x34) /* Fault Status Register, 32 bit */ -#define IOMMU_REG_FEVNT_CON (0x38) /* Fault Event Control Reg, 32 bit */ -#define IOMMU_REG_FEVNT_DATA (0x3C) /* Fault Event Data Register, 32 bit */ -#define IOMMU_REG_FEVNT_ADDR (0x40) /* Fault Event Address Reg, 32 bit */ -#define IOMMU_REG_FEVNT_UADDR (0x44) /* Fault Event Upper Addr Reg, 32 bit */ -#define IOMMU_REG_AFAULT_LOG (0x58) /* Advanced Fault Log Reg, 64 bit */ -#define IOMMU_REG_PMER (0x64) /* Protected Memory Enble Reg, 32 bit */ -#define IOMMU_REG_PLMBR (0x68) /* Protected Low Mem Base Reg, 32 bit */ -#define IOMMU_REG_PLMLR (0x6C) /* Protected Low Mem Lim Reg, 32 bit */ -#define IOMMU_REG_PHMBR (0X70) /* Protectd High Mem Base Reg, 64 bit */ -#define IOMMU_REG_PHMLR (0x78) /* Protected High Mem Lim Reg, 64 bit */ -#define IOMMU_REG_INVAL_QH (0x80) /* Invalidation Queue Head, 64 bit */ -#define IOMMU_REG_INVAL_QT (0x88) /* Invalidation Queue Tail, 64 bit */ -#define IOMMU_REG_INVAL_QAR (0x90) /* Invalidtion Queue Addr Reg, 64 bit */ -#define IOMMU_REG_INVAL_CSR (0x9C) /* Inval Compl Status Reg, 32 bit */ -#define IOMMU_REG_INVAL_CECR (0xA0) /* Inval Compl Evnt Ctrl Reg, 32 bit */ -#define IOMMU_REG_INVAL_CEDR (0xA4) /* Inval Compl Evnt Data Reg, 32 bit */ -#define IOMMU_REG_INVAL_CEAR (0xA8) /* Inval Compl Event Addr Reg, 32 bit */ -#define IOMMU_REG_INVAL_CEUAR (0xAC) /* Inval Comp Evnt Up Addr reg, 32bit */ -#define IOMMU_REG_IRTAR (0xB8) /* INTR Remap Tbl Addr Reg, 64 bit */ - -/* ioapic memory region */ -#define IOAPIC_REGION_START (0xfee00000) -#define IOAPIC_REGION_END (0xfeefffff) - -/* iommu page */ -#define IOMMU_LEVEL_STRIDE (9) -#define IOMMU_LEVEL_SIZE ((uint64_t)1 << IOMMU_LEVEL_STRIDE) -#define IOMMU_LEVEL_OFFSET (IOMMU_LEVEL_SIZE - 1) -#define IOMMU_PAGE_SHIFT (12) -#define IOMMU_PAGE_SIZE (uint64_t)((uint64_t)1 << IOMMU_PAGE_SHIFT) -#define IOMMU_PAGE_MASK ~(IOMMU_PAGE_SIZE - 1) -#define IOMMU_PAGE_OFFSET (IOMMU_PAGE_SIZE - 1) -#define IOMMU_PAGE_ROUND(x) (((x) + IOMMU_PAGE_OFFSET) & IOMMU_PAGE_MASK) -#define IOMMU_PTOB(x) (((uint64_t)(x)) << IOMMU_PAGE_SHIFT) -#define IOMMU_BTOP(x) ((x) >> IOMMU_PAGE_SHIFT) -#define IOMMU_BTOPR(x) IOMMU_BTOP((x) + IOMMU_PAGE_OFFSET) -#define IOMMU_LEVEL_TO_AGAW(x) ((x) * 9 + 12) -#define IOMMU_IOVA_MAX_4G (((uint64_t)1 << 32) - 1) -#define IOMMU_SIZE_4G ((uint64_t)1 << 32) -#define IOMMU_SIZE_2M ((uint64_t)1 << 21) -#define IOMMU_2M_MASK ~(IOMMU_SIZE_2M - 1) -#define IOMMU_PTE_MAX (IOMMU_PAGE_SIZE >> 3) - -/* iommu page entry property */ -#define IOMMU_PAGE_PROP_READ (1) -#define IOMMU_PAGE_PROP_WRITE (2) -#define IOMMU_PAGE_PROP_RW (IOMMU_PAGE_PROP_READ | IOMMU_PAGE_PROP_WRITE) -#define IOMMU_PAGE_PROP_NOSYNC (4) - -/* root context entry */ -#define ROOT_ENTRY_GET_P(x) (((x)->lo) & 0x1) -#define ROOT_ENTRY_SET_P(x) ((x)->lo) |= 0x1 -#define ROOT_ENTRY_GET_CTP(x) (((x)->lo) & IOMMU_PAGE_MASK) -#define ROOT_ENTRY_SET_CTP(x, p) ((x)->lo) |= ((p) & IOMMU_PAGE_MASK) -#define CONT_ENTRY_GET_P(x) (((x)->lo) & 0x1) -#define CONT_ENTRY_SET_P(x) ((x)->lo) |= 0x1 -#define CONT_ENTRY_SET_ASR(x, p) ((x)->lo) |= ((p) & IOMMU_PAGE_MASK) -#define CONT_ENTRY_GET_ASR(x) (((x)->lo) & IOMMU_PAGE_MASK) -#define CONT_ENTRY_SET_AW(x, v) ((x)->hi) |= ((v) & 7) -#define CONT_ENTRY_SET_DID(x, v) ((x)->hi) |= (((v) & ((1 << 16) - 1)) << 8) - -/* fault register */ -#define IOMMU_FAULT_STS_PPF (2) -#define IOMMU_FAULT_STS_PFO (1) -#define IOMMU_FAULT_STS_ITE (1 << 6) -#define IOMMU_FAULT_STS_ICE (1 << 5) -#define IOMMU_FAULT_STS_IQE (1 << 4) -#define IOMMU_FAULT_GET_INDEX(x) (((x) >> 8) & 0xff) -#define IOMMU_FRR_GET_F(x) ((x) >> 63) -#define IOMMU_FRR_GET_FR(x) (((x) >> 32) & 0xff) -#define IOMMU_FRR_GET_FT(x) (((x) >> 62) & 0x1) -#define IOMMU_FRR_GET_SID(x) ((x) & 0xffff) - -/* (ex)capability register */ -#define IOMMU_CAP_GET_NFR(x) ((((x) >> 40) & 0xff) + 1) -#define IOMMU_CAP_GET_DWD(x) (((x) >> 54) & 1) -#define IOMMU_CAP_GET_DRD(x) (((x) >> 55) & 1) -#define IOMMU_CAP_GET_PSI(x) (((x) >> 39) & 1) -#define IOMMU_CAP_GET_SPS(x) (((x) >> 34) & 0xf) -#define IOMMU_CAP_GET_ISOCH(x) (((x) >> 23) & 1) -#define IOMMU_CAP_GET_ZLR(x) (((x) >> 22) & 1) -#define IOMMU_CAP_GET_MAMV(x) (((x) >> 48) & 0x3f) -#define IOMMU_CAP_GET_CM(x) (((x) >> 7) & 1) -#define IOMMU_CAP_GET_PHMR(x) (((x) >> 6) & 1) -#define IOMMU_CAP_GET_PLMR(x) (((x) >> 5) & 1) -#define IOMMU_CAP_GET_RWBF(x) (((x) >> 4) & 1) -#define IOMMU_CAP_GET_AFL(x) (((x) >> 3) & 1) -#define IOMMU_CAP_GET_FRO(x) ((((x) >> 24) & 0x3ff) * 16) -#define IOMMU_CAP_MGAW(x) (((((uint64_t)x) >> 16) & 0x3f) + 1) -#define IOMMU_CAP_SAGAW(x) (((x) >> 8) & 0x1f) -#define IOMMU_CAP_ND(x) (1 << (((x) & 0x7) *2 + 4)) -1 -#define IOMMU_ECAP_GET_IRO(x) ((((x) >> 8) & 0x3ff) << 4) -#define IOMMU_ECAP_GET_MHMV(x) ((x >> 20) & 0xf) -#define IOMMU_ECAP_GET_SC(x) ((x) & 0x80) -#define IOMMU_ECAP_GET_PT(x) ((x) & 0x40) -#define IOMMU_ECAP_GET_CH(x) ((x) & 0x20) -#define IOMMU_ECAP_GET_EIM(x) ((x) & 0x10) -#define IOMMU_ECAP_GET_IR(x) ((x) & 0x8) -#define IOMMU_ECAP_GET_DI(x) ((x) & 0x4) -#define IOMMU_ECAP_GET_QI(x) ((x) & 0x2) -#define IOMMU_ECAP_GET_C(x) ((x) & 0x1) - - -/* iotlb invalidation */ -#define TLB_INV_GLOBAL (((uint64_t)1) << 60) -#define TLB_INV_DOMAIN (((uint64_t)2) << 60) -#define TLB_INV_PAGE (((uint64_t)3) << 60) -#define TLB_INV_GET_IAIG(x) (((x) >> 57) & 7) -#define TLB_INV_DRAIN_READ (((uint64_t)1) << 49) -#define TLB_INV_DRAIN_WRITE (((uint64_t)1) << 48) -#define TLB_INV_DID(x) (((uint64_t)((x) & 0xffff)) << 32) -#define TLB_INV_IVT (((uint64_t)1) << 63) -#define TLB_IVA_HINT(x) (((x) & 0x1) << 6) -#define TLB_IVA_LEAF 1 -#define TLB_IVA_WHOLE 0 - -/* context invalidation */ -#define CCMD_INV_ICC (((uint64_t)1) << 63) -#define CCMD_INV_GLOBAL (((uint64_t)1) << 61) -#define CCMD_INV_DOMAIN (((uint64_t)2) << 61) -#define CCMD_INV_DEVICE (((uint64_t)3) << 61) -#define CCMD_INV_DID(x) ((uint64_t)((x) & 0xffff)) -#define CCMD_INV_SID(x) (((uint64_t)((x) & 0xffff)) << 16) -#define CCMD_INV_FM(x) (((uint64_t)((x) & 0x3)) << 32) - -/* global command register */ -#define IOMMU_GCMD_TE (((uint32_t)1) << 31) -#define IOMMU_GCMD_SRTP (((uint32_t)1) << 30) -#define IOMMU_GCMD_SFL (((uint32_t)1) << 29) -#define IOMMU_GCMD_EAFL (((uint32_t)1) << 28) -#define IOMMU_GCMD_WBF (((uint32_t)1) << 27) -#define IOMMU_GCMD_QIE (((uint32_t)1) << 26) -#define IOMMU_GCMD_IRE (((uint32_t)1) << 25) -#define IOMMU_GCMD_SIRTP (((uint32_t)1) << 24) -#define IOMMU_GCMD_CFI (((uint32_t)1) << 23) - -/* global status register */ -#define IOMMU_GSTS_TES (((uint32_t)1) << 31) -#define IOMMU_GSTS_RTPS (((uint32_t)1) << 30) -#define IOMMU_GSTS_FLS (((uint32_t)1) << 29) -#define IOMMU_GSTS_AFLS (((uint32_t)1) << 28) -#define IOMMU_GSTS_WBFS (((uint32_t)1) << 27) -#define IOMMU_GSTS_QIES (((uint32_t)1) << 26) -#define IOMMU_GSTS_IRES (((uint32_t)1) << 25) -#define IOMMU_GSTS_IRTPS (((uint32_t)1) << 24) -#define IOMMU_GSTS_CFIS (((uint32_t)1) << 23) - -/* psi address mask */ -#define ADDR_AM_MAX(m) (((uint_t)1) << (m)) -#define ADDR_AM_OFFSET(n, m) ((n) & (ADDR_AM_MAX(m) - 1)) - -/* dmar fault event */ -#define IOMMU_INTR_IPL (8) -#define IOMMU_REG_FEVNT_CON_IM_SHIFT (31) - -/* iommu enable state */ -#define DMAR_ENABLE 0x1 -#define QINV_ENABLE 0x2 -#define INTRR_ENABLE 0x4 - -/* invalidation queue table entry size */ -#define QINV_ENTRY_SIZE 0x10 - -/* max value of Queue Size field of Invalidation Queue Address Register */ -#define QINV_MAX_QUEUE_SIZE 0x7 - -/* status data size of invalidation wait descriptor */ -#define QINV_SYNC_DATA_SIZE 0x4 - -/* status data value of invalidation wait descriptor */ -#define QINV_SYNC_DATA_FENCE 1 -#define QINV_SYNC_DATA_UNFENCE 2 - -/* invalidation queue head and tail */ -#define QINV_IQA_HEAD(QH) BITX((QH), 18, 4) -#define QINV_IQA_TAIL_SHIFT 4 - -/* max value of Size field of Interrupt Remapping Table Address Register */ -#define INTRR_MAX_IRTA_SIZE 0xf - -/* interrupt remapping table entry size */ -#define INTRR_RTE_SIZE 0x10 - -/* ioapic redirection table entry related shift of remappable interrupt */ -#define INTRR_IOAPIC_IIDX_SHIFT 17 -#define INTRR_IOAPIC_FORMAT_SHIFT 16 -#define INTRR_IOAPIC_TM_SHIFT 15 -#define INTRR_IOAPIC_POL_SHIFT 13 -#define INTRR_IOAPIC_IIDX15_SHIFT 11 - -/* msi intr entry related shift of remappable interrupt */ -#define INTRR_MSI_IIDX_SHIFT 5 -#define INTRR_MSI_FORMAT_SHIFT 4 -#define INTRR_MSI_SHV_SHIFT 3 -#define INTRR_MSI_IIDX15_SHIFT 2 - -#define INTRR_IIDX_FULL (uint_t)-1 - -#define RDT_DLM(rdt) BITX((rdt), 10, 8) -#define RDT_DM(rdt) BT_TEST(&(rdt), 11) -#define RDT_POL(rdt) BT_TEST(&(rdt), 13) -#define RDT_TM(rdt) BT_TEST(&(rdt), 15) - -#define INTRR_DISABLE (void *)-1 - -/* page entry structure */ -typedef uint64_t *iopte_t; - -/* root/context entry structure */ -typedef struct iorce { - uint64_t lo; - uint64_t hi; -} *iorce_t; - -/* kernel maintained page table entry */ -typedef struct iovpte { - /* - * pointer to the cpu accessable - * iommu page table - */ - caddr_t vp; - /* - * pointer to the real iommu - * page table - */ - caddr_t pp; -} *iovpte_t; - -/* - * struct iommu_kstat - * kstat tructure for iommu - */ -typedef struct iommu_kstat { - - /* hardware dependent */ - kstat_named_t is_dmar_enabled; - kstat_named_t is_qinv_enabled; - kstat_named_t is_intrr_enabled; - kstat_named_t is_iotlb_psi; - kstat_named_t is_iotlb_domain; - kstat_named_t is_iotlb_global; - kstat_named_t is_write_buffer; - kstat_named_t is_context_cache; - kstat_named_t is_wait_complete_us; - kstat_named_t is_domain_alloc; - - /* hardware independent */ - kstat_named_t is_page_used; -} iommu_kstat_t; - -/* - * struct iommu_stat - * statistics for iommu - */ -typedef struct iommu_stat { - uint64_t st_iotlb_psi; - uint64_t st_iotlb_domain; - uint64_t st_iotlb_global; - uint64_t st_write_buffer; - uint64_t st_context_cache; - uint64_t st_wait_complete_us; - uint64_t st_domain_alloc; -} iommu_stat_t; - -struct intel_iommu_state; -struct iommu_dvma_cookie; -struct dmar_domain_state; - -/* - * invalidation granularity - */ -typedef enum { - TLB_INV_G_GLOBAL = 1, - TLB_INV_G_DOMAIN, - TLB_INV_G_PAGE -} tlb_inv_g_t; - -typedef enum { - CTT_INV_G_GLOBAL = 1, - CTT_INV_G_DOMAIN, - CTT_INV_G_DEVICE -} ctt_inv_g_t; - -typedef enum { - IEC_INV_GLOBAL = 0, - IEC_INV_INDEX -} iec_inv_g_t; - -/* - * struct dmar_ops - * dmar hardware operation functions - */ -struct dmar_ops { - /* enable */ - void (*do_enable)(struct intel_iommu_state *iommu); - - /* page fault */ - int (*do_fault)(struct intel_iommu_state *iommu); - - /* cache related */ - void (*do_flwb)(struct intel_iommu_state *iommu); - void (*do_iotlb_psi)(struct intel_iommu_state *iommu, uint_t domain_id, - uint64_t dvma, uint_t count, uint_t hint); - void (*do_iotlb_dsi)(struct intel_iommu_state *iommu, uint_t domain_id); - void (*do_iotlb_gbl)(struct intel_iommu_state *iommu); - void (*do_context_fsi)(struct intel_iommu_state *iommu, - uint8_t function_mask, - uint16_t source_id, uint_t domain_id); - void (*do_context_dsi)(struct intel_iommu_state *iommu, - uint_t domain_id); - void (*do_context_gbl)(struct intel_iommu_state *iommu); - void (*do_plant_wait)(struct intel_iommu_state *iommu, - struct iommu_dvma_cookie *dcookies, uint_t count, - uint_t array_size); - void (*do_reap_wait)(struct intel_iommu_state *iommu); - - /* root entry */ - void (*do_set_root_table)(struct intel_iommu_state *iommu); - - /* cpu cache line flush */ - void (*do_clflush)(caddr_t addr, uint_t size); -}; - -/* - * struct iotlb_cache_node - * the pending data for iotlb flush - */ -typedef struct iotlb_pend_node { - /* node to hook into the list */ - list_node_t node; - /* ptr to dvma cookie array */ - struct iommu_dvma_cookie *icn_dcookies; - /* valid cookie count */ - uint_t icn_count; - /* array size */ - uint_t icn_array_size; -} iotlb_pend_node_t; - -/* - * struct iotlb_cache_head - * the pending head for the iotlb flush - */ -typedef struct iotlb_pend_head { - /* the pending iotlb list */ - kmutex_t ich_pend_lock; - list_t ich_pend_list; - uint_t ich_pend_count; - - /* the pending node cache list */ - kmutex_t ich_mem_lock; - list_t ich_mem_list; -} iotlb_pend_head_t; - -struct inv_queue_state; -struct intr_remap_tbl_state; -struct iommu_pghdl; - -#define IOMMU_PGHDL_HASH_SIZE (256) - -/* - * struct intel_iommu_state - * This structure describes the state information - * of each iommu unit in the platform. It is cre- - * ated in the dmarnex driver's attach(), and will - * be used in every DMA DDI and the iommu transla- - * tion functions - * - * node - the list node to hook it in iommu_states - * iu_drhd - the related drhd - * iu_reg_handle - register access handler - * iu_reg_lock - lock to protect register operation - * iu_reg_address - virtual address of the register base address - * iu_capability - copy of the capability register - * iu_excapability - copy of the extention register - * iu_root_entry_paddr - root entry page table - * iu_root_context_lock - root context entry lock - * iu_gaw - guest address width - * iu_agaw - adjusted guest address width - * iu_level - the page table level - * iu_global_cmd_reg - global command register save place - * iu_max_domain - the maximum domain numbers - * iu_domain_id_hdl - domain id allocator handler - * iu_enabled - the soft state of the iommu - * iu_coherency - hardware access is coherent - * iu_kstat - kstat pointer - * iu_statistics - iommu statistics - * iu_dmar_ops - iommu operation functions - * iu_pend_head - pending iotlb list - * iu_inv_queue - invalidation queue state - * iu_intr_remap_tbl - interrupt remapping table state - * iu_pghdl_hash - hash of pages allocated for IOMMU internal work. - */ -typedef struct intel_iommu_state { - list_node_t node; - drhd_info_t *iu_drhd; - ddi_acc_handle_t iu_reg_handle; - kmutex_t iu_reg_lock; - caddr_t iu_reg_address; - uint64_t iu_capability; - uint64_t iu_excapability; - paddr_t iu_root_entry_paddr; - kmutex_t iu_root_context_lock; - int iu_gaw; - int iu_agaw; - int iu_level; - uint32_t iu_global_cmd_reg; - int iu_max_domain; - iommu_rscs_t iu_domain_id_hdl; - uchar_t iu_enabled; - boolean_t iu_coherency; - kstat_t *iu_kstat; - iommu_stat_t iu_statistics; - struct dmar_ops *iu_dmar_ops; - iotlb_pend_head_t iu_pend_head; - struct inv_queue_state *iu_inv_queue; - struct intr_remap_tbl_state *iu_intr_remap_tbl; - struct iommu_pghdl *iu_pghdl_hash[IOMMU_PGHDL_HASH_SIZE]; -} intel_iommu_state_t; - -/* - * struct dvma_cache_node - * dvma cache node - */ -typedef struct dvma_cache_node { - list_node_t node; - - /* parameters */ - size_t dcn_align; - uint64_t dcn_dvma; -} dvma_cache_node_t; - -/* - * struct dvma_cache_head - * dvma cache head - */ -typedef struct dvma_cache_head { - /* the list of the free dvma */ - kmutex_t dch_free_lock; - list_t dch_free_list; - uint_t dch_free_count; - - /* the cache for the node memory */ - kmutex_t dch_mem_lock; - list_t dch_mem_list; -} dvma_cache_head_t; - -#define DVMA_CACHE_HEAD_CNT 64 - -/* - * struct dmar_domain_state - * This structure describes the state information - * of an iommu domain. It is created and initiated - * when the driver call ddi_dma_bind_handle(). And - * will be used in each iommu translation fucntions - * - * dm_domain_id - the domain id - * dm_iommu - iommu pointer this domain belongs to - * dm_dvma_map - dvma map - * dm_dvma_cache - dvma cahce lists - * dm_page_table_paddr - page table address for this domain - * dm_pgtable_lock - lock to protect changes to page table. - * dm_pt_tree - the kernel maintained page tables - * dm_identity - does this domain identity mapped - */ -typedef struct dmar_domain_state { - list_node_t node; - uint_t dm_domain_id; - intel_iommu_state_t *dm_iommu; - vmem_t *dm_dvma_map; - dvma_cache_head_t dm_dvma_cache[DVMA_CACHE_HEAD_CNT]; - paddr_t dm_page_table_paddr; - kmutex_t dm_pgtable_lock; - struct iovpte dm_pt_tree; - boolean_t dm_identity; -} dmar_domain_state_t; - -/* - * struct pci_dev_info - * pci device info structure - */ -typedef struct pci_dev_info { - list_node_t node; - int pdi_seg; - int pdi_bus; - int pdi_devfn; - dev_info_t *pdi_dip; -} pci_dev_info_t; - -#define IOMMU_PPB_NONE 0 -#define IOMMU_PPB_PCIE_PCIE 1 -#define IOMMU_PPB_PCIE_PCI 2 -#define IOMMU_PPB_PCI_PCI 3 - -#define MAX_COOKIE_CACHE_SIZE 20 -/* - * struct iommu_dvma_cookie - * this cookie record the dvma allocated for - * an individual device - */ -typedef struct iommu_dvma_cookie { - uint64_t dc_addr; - uint64_t dc_size; - struct dmar_domain_state *dc_domain; - size_t dc_align; - struct iommu_dvma_cookie *dc_next; -} iommu_dvma_cookie_t; - -/* - * struct dvma_cookie_head - * the cookie cache head - */ -typedef struct dvma_cookie_head { - kmutex_t dch_lock; - iommu_dvma_cookie_t *dch_next; - uint_t dch_count; -} dvma_cookie_head_t; - -/* physical contigous pages for invalidation queue */ -typedef struct inv_queue_mem { - kmutex_t lock; - ddi_dma_handle_t dma_hdl; - ddi_acc_handle_t acc_hdl; - caddr_t vaddr; - paddr_t paddr; - uint_t size; - uint16_t head; - uint16_t tail; -} inv_queue_mem_t; - -/* - * invalidation queue state - * This structure describes the state information of the - * invalidation queue table and related status memeory for - * invalidation wait descriptor - * - * iq_table - invalidation queue table - * iq_sync - sync status memory for invalidation wait descriptor - * iotlb_pend_node - pending tlb node - */ -typedef struct inv_queue_state { - inv_queue_mem_t iq_table; - inv_queue_mem_t iq_sync; - iotlb_pend_node_t **iotlb_pend_node; -} inv_queue_state_t; - -/* invalidation queue entry structure */ -typedef struct inv_dsc { - uint64_t lo; - uint64_t hi; -} inv_dsc_t; - -/* helper macro for making queue invalidation descriptor */ -#define INV_DSC_TYPE(dsc) ((dsc)->lo & 0xF) -#define CC_INV_DSC_HIGH (0) -#define CC_INV_DSC_LOW(fm, sid, did, g) (((uint64_t)(fm) << 48) | \ - ((uint64_t)(sid) << 32) | \ - ((uint64_t)(did) << 16) | \ - ((uint64_t)(g) << 4) | \ - 1) - -#define IOTLB_INV_DSC_HIGH(addr, ih, am) (((uint64_t)(addr)) | \ - ((uint64_t)(ih) << 6) | \ - ((uint64_t)(am))) - -#define IOTLB_INV_DSC_LOW(did, dr, dw, g) (((uint64_t)(did) << 16) | \ - ((uint64_t)(dr) << 7) | \ - ((uint64_t)(dw) << 6) | \ - ((uint64_t)(g) << 4) | \ - 2) - -#define DEV_IOTLB_INV_DSC_HIGH(addr, s) (((uint64_t)(addr)) | (s)) - -#define DEV_IOTLB_INV_DSC_LOW(sid, max_invs_pd) ( \ - ((uint64_t)(sid) << 32) | \ - ((uint64_t)(max_invs_pd) << 16) | \ - 3) - -#define IEC_INV_DSC_HIGH (0) -#define IEC_INV_DSC_LOW(iidx, im, g) (((uint64_t)(iidx) << 32) | \ - ((uint64_t)(im) << 27) | \ - ((uint64_t)(g) << 4) | \ - 4) - -#define INV_WAIT_DSC_HIGH(saddr) ((uint64_t)(saddr)) - -#define INV_WAIT_DSC_LOW(sdata, fn, sw, iflag) (((uint64_t)(sdata) << 32) | \ - ((uint64_t)(fn) << 6) | \ - ((uint64_t)(sw) << 5) | \ - ((uint64_t)(iflag) << 4) | \ - 5) - -/* save source id and iommu structure for ioapic */ -typedef struct ioapic_iommu_info { - uint16_t sid; - intel_iommu_state_t *iommu; -} ioapic_iommu_info_t; - -typedef struct intr_remap_private { - intel_iommu_state_t *ir_iommu; - uint16_t ir_iidx; - uint32_t ir_sid_svt_sq; -} intr_remap_private_t; - -#define INTRR_PRIVATE(airq) ((intr_remap_private_t *)airq->airq_intrr_private) -#define AIRQ_PRIVATE(airq) (airq->airq_intrr_private) - -/* interrupt remapping table state info */ -typedef struct intr_remap_tbl_state { - kmutex_t lock; - ddi_dma_handle_t dma_hdl; - ddi_acc_handle_t acc_hdl; - caddr_t vaddr; - paddr_t paddr; - uint_t size; - bitset_t map; - uint_t free; -} intr_remap_tbl_state_t; - -/* interrupt remapping table entry */ -typedef struct intr_rte { - uint64_t lo; - uint64_t hi; -} intr_rte_t; - -#define IRTE_HIGH(sid_svt_sq) (sid_svt_sq) -#define IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p) \ - (((uint64_t)(dst) << 32) | \ - ((uint64_t)(vector) << 16) | \ - ((uint64_t)(dlm) << 5) | \ - ((uint64_t)(tm) << 4) | \ - ((uint64_t)(rh) << 3) | \ - ((uint64_t)(dm) << 2) | \ - ((uint64_t)(fpd) << 1) | \ - (p)) - -typedef enum { - SVT_NO_VERIFY = 0, /* no verification */ - SVT_ALL_VERIFY, /* using sid and sq to verify */ - SVT_BUS_VERIFY, /* verify #startbus and #endbus */ - SVT_RSVD -} intrr_svt_t; - -typedef enum { - SQ_VERIFY_ALL = 0, /* verify all 16 bits */ - SQ_VERIFY_IGR_1, /* ignore bit 3 */ - SQ_VERIFY_IGR_2, /* ignore bit 2-3 */ - SQ_VERIFY_IGR_3 /* ignore bit 1-3 */ -} intrr_sq_t; - -/* - * struct vmem_walk_arg - * the arg of vmem vmem walker - */ -typedef struct vmem_walk_arg { - rmrr_info_t *vwa_rmrr; - dmar_domain_state_t *vwa_domain; - dev_info_t *vwa_dip; -} vmem_walk_arg_t; - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_INTEL_IOMMU_H */
--- a/usr/src/uts/i86pc/sys/iommu_rscs.h Sat Jan 30 15:04:39 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_IOMMU_H -#define _SYS_IOMMU_H - -/* - * XXX - */ - -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/modctl.h> -#include <sys/sunddi.h> - -#ifdef __cplusplus -extern "C" { -#endif - - -/* - * iommu_page_alloc() - * allocate a 4K page and map it into KVA - * iommu_page_free() - * unmap and free page from iommu_page_alloc() - * iommu_page_map() - * map page into kva - * iommu_page_unmap() - * unmap page out of kva - */ - -typedef struct iommu_pghdl { - ddi_dma_handle_t dma_hdl; - ddi_acc_handle_t mem_hdl; - paddr_t paddr; - caddr_t vaddr; - struct iommu_pghdl *prev; - struct iommu_pghdl *next; -} iommu_pghdl_t; - -struct intel_iommu_state; - -iommu_pghdl_t *iommu_page_alloc(struct intel_iommu_state *iommu, int kmflag); -void iommu_page_free(struct intel_iommu_state *iommu, paddr_t paddr); -caddr_t iommu_get_vaddr(struct intel_iommu_state *iommu, paddr_t paddr); - -typedef struct iommu_rscs_s *iommu_rscs_t; - -void iommu_rscs_init(uint_t min_val, uint_t max_val, iommu_rscs_t *handle); -void iommu_rscs_fini(iommu_rscs_t *handle); -int iommu_rscs_alloc(iommu_rscs_t handle, uint_t *rs); -void iommu_rscs_free(iommu_rscs_t handle, uint_t rs); - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_IOMMU_H */
--- a/usr/src/uts/i86pc/sys/rootnex.h Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/i86pc/sys/rootnex.h Sat Jan 30 18:23:16 2010 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -205,6 +205,18 @@ #endif } rootnex_window_t; +typedef struct dvcookie { + uint64_t dvck_dvma; + uint64_t dvck_npages; + uint64_t dvck_sidx; + uint64_t dvck_eidx; +} dvcookie_t; + +typedef struct dcookie { + paddr_t dck_paddr; + uint64_t dck_npages; +} dcookie_t; + /* per dma handle private state */ typedef struct rootnex_dma_s { /* @@ -310,11 +322,16 @@ uchar_t *dp_prealloc_buffer; /* - * intel iommu related state - * dvma_cookies saves the dvma allocated for this handler, it has the - * size of si_max_pages, set when bind handler and freed when unbind + * Intel IOMMU (immu) related state + * dv_cookies saves the dvma allocated for this handler + * max index of dvcookies in dvmax */ - void *dp_dvma_cookies; + dvcookie_t *dp_dvcookies; + uint64_t dp_dvmax; + dcookie_t *dp_dcookies; + uint64_t dp_dmax; + uint64_t dp_max_cookies; + uint64_t dp_max_dcookies; /* * sleep flags set on bind and unset on unbind @@ -349,7 +366,6 @@ * r_dip - rootnex dip * r_reserved_msg_printed - ctlops reserve message threshold * r_counters - profile/performance counters - * r_intel_iommu_enabled - intel iommu enabled */ typedef struct rootnex_state_s { uint_t r_prealloc_cookies; @@ -361,11 +377,9 @@ ddi_iblock_cookie_t r_err_ibc; boolean_t r_reserved_msg_printed; uint64_t r_counters[ROOTNEX_CNT_LAST]; - boolean_t r_intel_iommu_enabled; iommulib_nexhandle_t r_iommulib_handle; } rootnex_state_t; - #ifdef __cplusplus } #endif
--- a/usr/src/uts/intel/ia32/ml/modstubs.s Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/intel/ia32/ml/modstubs.s Sat Jan 30 18:23:16 2010 -0800 @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1376,6 +1376,17 @@ #endif /* + * Stubs for rootnex nexus driver. + */ +#ifndef ROOTNEX_MODULE + MODULE(rootnex,drv); + STUB(rootnex, immu_init, 0); + STUB(rootnex, immu_startup, 0); + STUB(rootnex, immu_physmem_update, 0); + END_MODULE(rootnex); +#endif + +/* * Stubs for kernel socket, for iscsi */ #ifndef KSOCKET_MODULE
--- a/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c Sat Jan 30 18:23:16 2010 -0800 @@ -3746,6 +3746,8 @@ */ (void) i_ndi_config_node(new_child, DS_LINKED, 0); + DEVI_SET_PCI(new_child); + if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_PPB) { DEBUG3("--Bridge found bus [0x%x] device[0x%x] func [0x%x]\n",
--- a/usr/src/uts/intel/io/pci/pci_boot.c Sat Jan 30 15:04:39 2010 -0800 +++ b/usr/src/uts/intel/io/pci/pci_boot.c Sat Jan 30 18:23:16 2010 -0800 @@ -43,7 +43,6 @@ #include <sys/hotplug/pci/pciehpc_acpi.h> #include <sys/acpi/acpi.h> #include <sys/acpica.h> -#include <sys/intel_iommu.h> #include <sys/iommulib.h> #include <sys/devcache.h> #include <sys/pci_cfgacc_x86.h> @@ -2100,6 +2099,7 @@ } } + DEVI_SET_PCI(dip); reprogram = add_reg_props(dip, bus, dev, func, config_op, pciide); (void) ndi_devi_bind_driver(dip, 0);