Mercurial > illumos > illumos-gate
changeset 3272:9a3818e527a3
backout 6345809/6404017/6418713/6466248: x86 build failed; no more mr fscking nice guy
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/common/io/pcie_fault.c Mon Dec 18 11:06:59 2006 -0800 @@ -0,0 +1,1893 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/modctl.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> +#include <sys/fm/protocol.h> +#include <sys/fm/util.h> +#include <sys/promif.h> +#include <sys/disp.h> +#include <sys/pcie.h> +#include <sys/pci_cap.h> +#include <sys/pcie_impl.h> + +/* size of error queue */ +uint_t pf_dq_size = 32; + +#define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ + PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) +#define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ + PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) + +#define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ + PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ + PCIE_AER_SUCE_SERR_ASSERT) +#define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ + PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ + PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ + PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ + PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) + +#define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ + PCIE_DEVSTS_CE_DETECTED) + +#define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ + PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) + + +#define PF_DATA_NOT_FOUND -1 + +#define HAS_AER_LOGS(pf_data_p, bit) \ + (pf_data_p->aer_off && (bit & (pf_data_p->aer_control & \ + PCIE_AER_CTL_FST_ERR_PTR_MASK))) + +#define HAS_SAER_LOGS(pf_data_p, bit) \ + (pf_data_p->aer_off && (bit & (pf_data_p->s_aer_control & \ + PCIE_AER_SCTL_FST_ERR_PTR_MASK))) + +#define GET_SAER_CMD(pf_data_p) \ + (pf_data_p->s_aer_h1 >> PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & \ + PCIE_AER_SUCE_HDR_CMD_LWR_MASK; + +#define CE_ADVISORY(pf_data_p) \ + (pf_data_p->aer_ce_status & PCIE_AER_CE_AD_NFE) + +#define IS_RC(pf_data_p) \ + (pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_ROOT) + +/* PCIe Fault Fabric Error analysis table */ +typedef struct pf_fab_err_tbl { + uint32_t bit; /* Error bit */ + int (*handler)(); /* Error handling fuction */ +} pf_fab_err_tbl_t; + +/* DMA/PIO/CFG Handle Comparason Function Declaration */ +typedef int (*pf_hdl_compare_t)(struct i_ddi_fmhdl *, ddi_fm_error_t *, + uint32_t, pcie_req_id_t); + +/* PCIe Fault Support Functions. */ +static int pf_find_in_q(pcie_req_id_t bdf, pf_data_t *dq_p, int dq_tail); +static boolean_t pf_in_bus_range(pcie_ppd_t *ppd_p, pcie_req_id_t bdf); +static boolean_t pf_in_addr_range(pcie_ppd_t *ppd_p, uint32_t addr); +static int pf_pcie_dispatch(dev_info_t *pdip, pf_impl_t *impl); +static int pf_pci_dispatch(dev_info_t *pdip, pf_impl_t *impl); +static int pf_default_hdl(dev_info_t *dip, dev_info_t *pdip, + pcie_ppd_t *ppd_p, pf_impl_t *impl); + +/* PCIe Fabric Handle Lookup Support Functions. */ +static int pf_hdl_child_lookup(dev_info_t *rpdip, dev_info_t *dip, + ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf, + pf_hdl_compare_t cf); +static int pf_cfg_hdl_check(struct i_ddi_fmhdl *fmhdl, + ddi_fm_error_t *derr, uint32_t notused, pcie_req_id_t bdf); +static int pf_pio_hdl_check(struct i_ddi_fmhdl *fmhdl, + ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf); +static int pf_dma_hdl_check(struct i_ddi_fmhdl *fmhdl, + ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf); + + +/* PCIe/PCI Fault Handling Support Functions. */ +static int pf_pci_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, uint16_t *cmd, + pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type); +static int pf_analyse_error(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *q, int last_index); +static void pf_send_ereport(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *dq_p, int dq_tail); +static void pf_adjust_for_no_aer(pf_data_t *pf_data_p); +static void pf_adjust_for_no_saer(pf_data_t *pf_data_p); +static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *dq_p, + pf_data_t *pf_data_p); +static boolean_t pf_matched_in_rc(pf_data_t *dq_p, pf_data_t *pf_data_p, + uint32_t abort_type); +static int pf_analyse_error_tbl(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *dq_p, pf_data_t *pf_data_p, const pf_fab_err_tbl_t *tbl, + uint32_t err_reg); +static int pf_analyse_ca_ur(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_ma_ta(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_pci(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_perr_assert(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_ptlp(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_sc(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_to(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_uc(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_analyse_uc_data(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_matched_device(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_no_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static int pf_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, + uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); +static void pf_check_ce(pf_data_t *dq_p, int dq_tail); +static void pf_set_parent_erpt(pf_data_t *dq_p, int index, int erpt_val); + +int +pf_held(dev_info_t *dip) +{ + pcie_ppd_t *ppd_p = pcie_get_ppd(dip); + return (mutex_owned(&ppd_p->ppd_fm_lock)); +} + +boolean_t +pf_enter(dev_info_t *dip) +{ + pcie_ppd_t *ppd_p = pcie_get_ppd(dip); + if (!(ppd_p->ppd_fm_flags & PF_FM_READY)) + return (B_FALSE); + if (!pf_held(dip)) + mutex_enter(&ppd_p->ppd_fm_lock); + return (B_TRUE); +} + +void +pf_exit(dev_info_t *dip) +{ + pcie_ppd_t *ppd_p = pcie_get_ppd(dip); + mutex_exit(&ppd_p->ppd_fm_lock); +} + +/* + * Default pci/pci-x/pci-e error handler callbacks for + * SPARC PCI-E platforms + */ + +/* Called during postattach to initalize FM lock */ +void +pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc) +{ + pcie_ppd_t *ppd_p = pcie_get_ppd(dip); + struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; + int cap = DDI_FM_EREPORT_CAPABLE; + + mutex_init(&ppd_p->ppd_fm_lock, NULL, MUTEX_DRIVER, (void *)ibc); + + if (fmhdl) { + fmhdl->fh_cap |= cap; + } else { + ppd_p->ppd_fm_flags |= PF_IS_NH; + ddi_fm_init(dip, &cap, &ibc); + } + ppd_p->ppd_fm_flags |= PF_FM_READY; +} + +/* undo OPL FMA lock, called at predetach */ +void +pf_fini(dev_info_t *dip) +{ + pcie_ppd_t *ppd_p = pcie_get_ppd(dip); + + /* undo non-hardened drivers */ + if (ppd_p->ppd_fm_flags & PF_IS_NH) { + ppd_p->ppd_fm_flags &= ~PF_IS_NH; + ddi_fm_fini(dip); + } + + /* no other code should set the flag to false */ + ppd_p->ppd_fm_flags &= ~PF_FM_READY; + while (pf_held(dip)); + mutex_destroy(&ppd_p->ppd_fm_lock); +} + +/* Returns whether the "bdf" is in the bus range of a switch/bridge */ +static boolean_t +pf_in_bus_range(pcie_ppd_t *ppd_p, pcie_req_id_t bdf) +{ + pci_bus_range_t *br_p = &ppd_p->ppd_bus_range; + uint16_t hdr_type = ppd_p->ppd_hdr_type; + uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> + PCIE_REQ_ID_BUS_SHIFT; + + /* check if given bdf falls within bridge's bus range */ + if ((hdr_type == PCI_HEADER_ONE) && + ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) + return (B_TRUE); + else + return (B_FALSE); +} + +/* + * Returns whether the "addr" is in the addr range of a switch/bridge, or if the + * "addr" is in the assigned addr of a device. + */ +static boolean_t +pf_in_addr_range(pcie_ppd_t *ppd_p, uint32_t addr) +{ + uint_t i, low, hi; + ppb_ranges_t *ranges_p = ppd_p->ppd_addr_ranges; + pci_regspec_t *assign_p = ppd_p->ppd_assigned_addr; + + /* check if given address belongs to this device */ + for (i = 0; i < ppd_p->ppd_assigned_entries; i++, assign_p++) { + low = assign_p->pci_phys_low; + hi = low + assign_p->pci_size_low; + if ((addr < hi) && (addr >= low)) + return (B_TRUE); + } + + /* check if given address belongs to a child below this device */ + if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) { + for (i = 0; i < ppd_p->ppd_addr_entries; i++, ranges_p++) { + if (ranges_p->child_high & PCI_ADDR_MEM32) { + low = ranges_p->child_low; + hi = low + ranges_p->size_low; + if ((addr < hi) && (addr >= low)) + return (B_TRUE); + break; + } + } + } + + return (B_FALSE); +} + +int +pf_pci_dispatch(dev_info_t *pdip, pf_impl_t *impl) +{ + dev_info_t *dip; + pcie_ppd_t *ppd_p; + int sts = 0, ret = 0; + + /* for bridge, check all downstream */ + dip = ddi_get_child(pdip); + for (; dip; dip = ddi_get_next_sibling(dip)) { + /* make sure dip is attached, ie. fm_ready */ + if (!(ppd_p = pcie_get_ppd(dip)) || + !pf_enter(dip)) + continue; + + sts = pf_default_hdl(dip, pdip, ppd_p, impl); + ret |= (sts & PF_FAILURE) ? DDI_FAILURE : DDI_SUCCESS; + + if (sts & PF_DO_NOT_SCAN) + continue; + + if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) + ret |= pf_pci_dispatch(dip, impl); + } + return (ret); +} + +int +pf_pcie_dispatch(dev_info_t *pdip, pf_impl_t *impl) +{ + dev_info_t *dip; + pcie_req_id_t rid = impl->pf_fbdf; + pcie_ppd_t *ppd_p; + int sts, ret = DDI_SUCCESS; + + dip = ddi_get_child(pdip); + for (; dip; dip = ddi_get_next_sibling(dip)) { + /* Make sure dip is attached and fm_ready */ + if (!(ppd_p = pcie_get_ppd(dip)) || + !pf_enter(dip)) + continue; + + if ((ppd_p->ppd_bdf == rid) || + pf_in_bus_range(ppd_p, rid) || + pf_in_addr_range(ppd_p, impl->pf_faddr)) { + sts = pf_default_hdl(dip, pdip, ppd_p, impl); + + ret |= (sts & PF_FAILURE) ? DDI_FAILURE : DDI_SUCCESS; + + if (sts & PF_DO_NOT_SCAN) + continue; + } else { + pf_exit(dip); + continue; + } + + /* match or in bridge bus-range */ + switch (ppd_p->ppd_dev_type) { + case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: + ret |= pf_pci_dispatch(dip, impl); + return (ret); + case PCIE_PCIECAP_DEV_TYPE_UP: + case PCIE_PCIECAP_DEV_TYPE_DOWN: + if (ppd_p->ppd_bdf != rid) + ret |= pf_pcie_dispatch(dip, impl); + /* FALLTHROUGH */ + case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: + return (ret); + case PCIE_PCIECAP_DEV_TYPE_ROOT: + default: + ASSERT(B_FALSE); + } + } + return (ret); +} + +/* + * Called by the RC to scan the fabric. + * + * After all the necessary fabric devices are scanned, the error queue will be + * analyzed for error severity and ereports will be sent. + */ +int +pf_scan_fabric(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *dq_p, int *dq_tail_p) +{ + pf_impl_t impl; + pf_data_t *rc_pf_data_p; + int i, sts, ret = DDI_SUCCESS; + int last_rc_index = *dq_tail_p; + + impl.pf_rpdip = rpdip; + impl.pf_derr = derr; + impl.pf_dq_p = dq_p; + impl.pf_dq_tail_p = dq_tail_p; + + i = 0; + + /* + * Scan the fabric using the fault_bdf and fault_addr in error q. + * fault_bdf will be valid in the following cases: + * - Fabric message + * - Poisoned TLP + * - Signaled UR/CA + * - Received UR/CA + * - PIO load failures + */ + for (rc_pf_data_p = dq_p; IS_RC(rc_pf_data_p) && i <= last_rc_index; + rc_pf_data_p++, i++) { + impl.pf_fbdf = rc_pf_data_p->fault_bdf; + impl.pf_faddr = rc_pf_data_p->fault_addr; + + if ((impl.pf_fbdf && pf_find_in_q(impl.pf_fbdf, dq_p, + *dq_tail_p) == PF_DATA_NOT_FOUND) || + (!impl.pf_fbdf && impl.pf_faddr)) + ret |= pf_pcie_dispatch(rpdip, &impl); + } + + /* If this is due to safe access, don't analyse the errors and return */ + if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { + ret = DDI_SUCCESS; + sts = PF_NO_PANIC; + } else { + sts = pf_analyse_error(rpdip, derr, dq_p, *dq_tail_p); + pf_check_ce(dq_p, *dq_tail_p); + } + + pf_send_ereport(rpdip, derr, dq_p, *dq_tail_p); + *dq_tail_p = -1; + + /* + * If ret is not SUCCESS that means we were not able to add 1 or more + * devices to the fault q. Since that device could have have been the + * one which had a error, be conservative and panic here. + */ + if (ret != DDI_SUCCESS) + return (PF_PANIC | sts); + else + return (sts); +} + +/* + * For each device in the fault queue ensure that no ereport is sent if that + * device was scanned as a result of a CE in one of its children. + */ +void +pf_check_ce(pf_data_t *dq_p, int dq_tail) { + int i = dq_tail; + pf_data_t *pf_data_p; + + for (pf_data_p = &dq_p[dq_tail]; i >= 0; pf_data_p = &dq_p[--i]) { + if (pf_data_p->send_erpt == PF_SEND_ERPT_UNKNOWN) { + /* + * Always send ereport for the last device in a + * particular scan path. + */ + pf_data_p->send_erpt = PF_SEND_ERPT_YES; + + if (pf_data_p->severity_flags == (PF_CE | + PF_NO_ERROR)) { + /* + * Since this device had a CE don't send ereport + * for parents. + */ + pf_set_parent_erpt(dq_p, + pf_data_p->parent_index, PF_SEND_ERPT_NO); + } else { + /* Send ereports for all parents */ + pf_set_parent_erpt(dq_p, + pf_data_p->parent_index, PF_SEND_ERPT_YES); + } + } + } + +} + +void +pf_set_parent_erpt(pf_data_t *dq_p, int index, int erpt_val) { + int i; + pf_data_t *pf_data_p; + + for (i = index; i != PF_DATA_NOT_FOUND; i = pf_data_p->parent_index) { + pf_data_p = &dq_p[i]; + + if (pf_data_p->send_erpt != PF_SEND_ERPT_YES) + pf_data_p->send_erpt = erpt_val; + + } +} + +/* + * Returns the index of the bdf if found in the PCIe Fault Data Queue + * Returns PF_DATA_NOT_FOUND of the index if the bdf is not found. + * This function should not be called by RC. + */ +static int +pf_find_in_q(pcie_req_id_t bdf, pf_data_t *dq_p, int dq_tail) +{ + int i; + + /* Check if this is the first item in queue */ + if (dq_tail == -1) + return (PF_DATA_NOT_FOUND); + + for (i = dq_tail; i >= 0; i--) { + if (dq_p[i].bdf == bdf) + return (i); + } + + return (PF_DATA_NOT_FOUND); +} + +int +pf_get_dq_size() +{ + return (pf_dq_size); +} + +/* + * Add PFD to queue. + * Return true if successfully added. + * Return false if out of space or already in queue. + * Pass in pbdf = -1 if pfd is from RC. + */ +int +pf_en_dq(pf_data_t *pf_data_p, pf_data_t *dq_p, int *dq_tail_p, + pcie_req_id_t pbdf) +{ + int parent_index = PF_DATA_NOT_FOUND; + + if (*dq_tail_p >= (int)pf_dq_size) + return (DDI_FAILURE); + + /* Look for parent BDF if pfd is not from RC */ + if (pbdf != (uint16_t)0xFFFF) + parent_index = pf_find_in_q(pbdf, dq_p, *dq_tail_p); + + *dq_tail_p += 1; + dq_p[*dq_tail_p] = *pf_data_p; + dq_p[*dq_tail_p].parent_index = parent_index; + return (DDI_SUCCESS); +} + +/* Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue */ +static int +pf_default_hdl(dev_info_t *dip, dev_info_t *pdip, + pcie_ppd_t *ppd_p, pf_impl_t *impl) +{ + ddi_acc_handle_t h = ppd_p->ppd_cfg_hdl; + pf_data_t pf_data = {0}; + pcie_req_id_t pbdf; + uint16_t pcie_off, aer_off, pcix_off; + uint8_t hdr_type, dev_type; + int cb_sts, sts = PF_SUCCESS; + + pbdf = PCI_GET_BDF(pdip); + pf_data.bdf = PCI_GET_BDF(dip); + + /* Make sure this device hasn't already been snapshotted and cleared */ + if (pf_find_in_q(pf_data.bdf, impl->pf_dq_p, *impl->pf_dq_tail_p) != + PF_DATA_NOT_FOUND) + return (PF_SUCCESS); + + pf_data.dip = dip; + pf_data.bdg_secbus = ppd_p->ppd_bdg_secbus << 8; + pf_data.vendor_id = ppd_p->ppd_dev_ven_id & 0xFFFF; + pf_data.device_id = ppd_p->ppd_dev_ven_id >> 16; + pf_data.send_erpt = PF_SEND_ERPT_UNKNOWN; + + /* + * Read vendor/device ID and check with cached data, if it doesn't match + * could very well be a device that isn't responding anymore. Just + * stop. Save the basic info in the error q for post mortem debugging + * purposes. + */ + if (pci_config_get32(h, PCI_CONF_VENID) != ppd_p->ppd_dev_ven_id) { + (void) pf_en_dq(&pf_data, impl->pf_dq_p, impl->pf_dq_tail_p, + pbdf); + return (DDI_FAILURE); + } + + hdr_type = ppd_p->ppd_hdr_type; + dev_type = ppd_p->ppd_dev_type; + + pf_data.hdr_type = hdr_type; + pf_data.command = pci_config_get16(h, PCI_CONF_COMM); + pf_data.status = pci_config_get16(h, PCI_CONF_STAT); + pf_data.rev_id = pci_config_get8(h, PCI_CONF_REVID); + pcie_off = ppd_p->ppd_pcie_off; + aer_off = ppd_p->ppd_aer_off; + + if (hdr_type == PCI_HEADER_ONE) { + pf_data.s_status = pci_config_get16(h, PCI_BCNF_SEC_STATUS); + } + + pf_data.dev_type = dev_type; + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV) { + if (pci_lcap_locate(h, PCI_CAP_ID_PCIX, &pcix_off) + != DDI_FAILURE) { + pf_data.pcix_s_status = pci_config_get16(h, + pcix_off + PCI_PCIX_SEC_STATUS); + pf_data.pcix_bdg_status = pci_config_get32(h, + pcix_off + PCI_PCIX_BDG_STATUS); + } + goto clear; + } + + if (!pcie_off) + goto clear; + + pf_data.dev_status = PCI_CAP_GET16(h, NULL, pcie_off, PCIE_DEVSTS); + pf_data.pcie_off = pcie_off; + + /* + * If a bridge does not have any error no need to scan any further down. + * For PCIe devices, check the PCIe device status and PCI secondary + * status. + * - Some non-compliant PCIe devices do not utilize PCIe + * error registers. If so rely on legacy PCI error registers. + * For PCI devices, check the PCI secondary status. + */ + if (hdr_type == PCI_HEADER_ONE) { + if ((dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) && + !(pf_data.dev_status & PF_PCIE_BDG_ERR) && + !(pf_data.s_status & PF_PCI_BDG_ERR)) + sts |= PF_DO_NOT_SCAN; + + if ((dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV) && + !(pf_data.s_status & PF_PCI_BDG_ERR)) + sts |= PF_DO_NOT_SCAN; + } + + if (!aer_off) + goto clear; + + pf_data.aer_off = aer_off; + pf_data.aer_ce_status = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_CE_STS); + pf_data.aer_ue_status = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_UCE_STS); + pf_data.aer_severity = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_UCE_SERV); + pf_data.aer_control = PCI_XCAP_GET32(h, NULL, aer_off, PCIE_AER_CTL); + pf_data.aer_h0 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_HDR_LOG + 0x0); + pf_data.aer_h1 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_HDR_LOG + 0x4); + pf_data.aer_h2 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_HDR_LOG + 0x8); + pf_data.aer_h3 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_HDR_LOG + 0xc); + + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + pf_data.s_aer_ue_status = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SUCE_STS); + pf_data.s_aer_severity = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SUCE_SERV); + pf_data.s_aer_control = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SCTL); + pf_data.s_aer_h0 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SHDR_LOG + 0x0); + pf_data.s_aer_h1 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SHDR_LOG + 0x4); + pf_data.s_aer_h2 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SHDR_LOG + 0x8); + pf_data.s_aer_h3 = PCI_XCAP_GET32(h, NULL, aer_off, + PCIE_AER_SHDR_LOG + 0xc); + } + +clear: + /* Clear the Legacy PCI Errors */ + pci_config_put16(h, PCI_CONF_STAT, pf_data.status); + + if (hdr_type == PCI_HEADER_ONE) + pci_config_put16(h, PCI_BCNF_SEC_STATUS, pf_data.s_status); + + if (!pcie_off) + goto queue; + + /* Clear the Advanced PCIe Errors */ + if (aer_off) { + PCI_XCAP_PUT32(h, NULL, aer_off, PCIE_AER_CE_STS, + pf_data.aer_ce_status); + PCI_XCAP_PUT32(h, NULL, aer_off, PCIE_AER_UCE_STS, + pf_data.aer_ue_status); + + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) + PCI_XCAP_PUT32(h, NULL, aer_off, + PCIE_AER_SUCE_STS, pf_data.s_aer_ue_status); + } + + /* Clear the PCIe Errors */ + PCI_CAP_PUT16(h, PCI_CAP_ID_PCI_E, pcie_off, PCIE_DEVSTS, + pf_data.dev_status); + +queue: + /* + * If the driver is FMA hardened and callback capable, call it's + * callback function + */ + if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { + cb_sts = ndi_fm_handler_dispatch(pdip, dip, impl->pf_derr); + if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) + sts |= PF_FAILURE; + else + sts |= PF_SUCCESS; + } + + /* Add the snapshot to the error q */ + if (pf_en_dq(&pf_data, impl->pf_dq_p, impl->pf_dq_tail_p, pbdf) == + DDI_FAILURE) + sts |= PF_FAILURE; + + return (sts); +} + +/* + * Function used by PCI error handlers to check if captured address is stored + * in the DMA or ACC handle caches. + * return: PF_HDL_NOTFOUND if a handle is not found + * PF_HDL_FOUND if a handle is found + */ +int +pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint32_t addr, + pcie_req_id_t bdf) +{ + ddi_fm_error_t derr; + int found = 0; + + /* If we don't know the addr or rid just return with UNKNOWN */ + if (addr == NULL && bdf == NULL) + return (PF_HDL_NOTFOUND); + + if (!(flag & (PF_DMA_ADDR | PF_PIO_ADDR | PF_CFG_ADDR))) { + return (PF_HDL_NOTFOUND); + } + + bzero(&derr, sizeof (ddi_fm_error_t)); + derr.fme_version = DDI_FME_VERSION; + derr.fme_flag = DDI_FM_ERR_UNEXPECTED; + derr.fme_ena = ena; + + /* If we know the addr or bdf mark the handle as failed */ + if (flag & PF_DMA_ADDR) { + if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, + pf_dma_hdl_check) != PF_HDL_NOTFOUND) + found++; + } + if (flag & PF_PIO_ADDR) { + if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, + pf_pio_hdl_check) != PF_HDL_NOTFOUND) + found++; + } + if (flag & PF_CFG_ADDR) { + if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, + pf_cfg_hdl_check) != PF_HDL_NOTFOUND) + found++; + } + + return (found ? PF_HDL_FOUND : PF_HDL_NOTFOUND); +} + +/* + * Recursively search the tree for the handler that matches the given address. + * If the BDF is known, only check the handlers that are associated with the + * given BDF, otherwise search the entire tree. + */ +static int +pf_hdl_child_lookup(dev_info_t *rpdip, dev_info_t *dip, + ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf, + pf_hdl_compare_t cf) +{ + int status = PF_HDL_NOTFOUND; + struct i_ddi_fmhdl *fmhdl; + struct i_ddi_fmtgt *tgt; + pcie_req_id_t child_bdf; + + child_bdf = PCI_GET_BDF(dip); + + i_ddi_fm_handler_enter(dip); + fmhdl = DEVI(dip)->devi_fmhdl; + ASSERT(fmhdl); + + /* Check if dip and BDF match, if not recurse to it's children. */ + if (bdf == NULL || child_bdf == bdf) { + /* If we found the handler stop the search */ + if ((status = cf(fmhdl, derr, addr, bdf)) != PF_HDL_NOTFOUND) + goto done; + } + + /* If we can't find the handler check it's children */ + for (tgt = fmhdl->fh_tgts; tgt != NULL; tgt = tgt->ft_next) { + if ((status = pf_hdl_child_lookup(rpdip, tgt->ft_dip, derr, + addr, bdf, cf)) != PF_HDL_NOTFOUND) + goto done; + } + +done: + i_ddi_fm_handler_exit(dip); + + return (status); +} + +/* + * Find and Mark CFG Handles as failed associated with the given BDF. We should + * always know the BDF for CFG accesses, since it is encoded in the address of + * the TLP. Since there can be multiple cfg handles, mark them all as failed. + */ +/* ARGSUSED */ +static int +pf_cfg_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, + uint32_t notused, pcie_req_id_t bdf) +{ + ndi_fmc_t *fcp; + ndi_fmcentry_t *fep; + ddi_acc_handle_t ap; + ddi_acc_hdl_t *hp; + int status = PF_HDL_NOTFOUND; + + ASSERT(bdf); + + /* Return NOTFOUND if this driver doesn't support ACC flagerr */ + if (!DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap) || + ((fcp = fmhdl->fh_acc_cache) == NULL)) + return (PF_HDL_NOTFOUND); + + mutex_enter(&fcp->fc_lock); + for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { + ap = fep->fce_resource; + hp = impl_acc_hdl_get(ap); + + /* CFG space is always reg 0 */ + if (hp->ah_rnumber == 0) { + i_ddi_fm_acc_err_set(ap, derr->fme_ena, DDI_FM_NONFATAL, + DDI_FM_ERR_UNEXPECTED); + ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); + derr->fme_acc_handle = ap; + status = PF_HDL_FOUND; + } + } + mutex_exit(&fcp->fc_lock); + + return (status); +} + +/* + * Find and Mark all ACC Handles associated with a give address and BDF as + * failed. If the BDF != NULL, then check to see if the device has a ACC Handle + * associated with ADDR. If the handle is not found, mark all the handles as + * failed. If the BDF == NULL, mark the handle as failed if it is associated + * with ADDR. + */ +static int +pf_pio_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, + uint32_t addr, pcie_req_id_t bdf) +{ + ndi_fmc_t *fcp; + ndi_fmcentry_t *fep; + ddi_acc_handle_t ap; + ddi_acc_hdl_t *hp; + uint32_t base_addr; + uint_t size; + int status = PF_HDL_NOTFOUND; + + if (!DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap) || + ((fcp = fmhdl->fh_acc_cache) == NULL)) + return (PF_HDL_NOTFOUND); + + mutex_enter(&fcp->fc_lock); + for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { + ap = fep->fce_resource; + hp = impl_acc_hdl_get(ap); + + /* CFG space is always reg 0, don't mark config handlers. */ + if (hp->ah_rnumber == 0) + continue; + + /* + * Normalize the base addr to the addr and strip off the + * HB info. All PIOs are 32 bit access only. + */ + base_addr = (uint32_t)(hp->ah_pfn << MMU_PAGESHIFT) + + hp->ah_offset; + size = hp->ah_len; + + if (((addr >= base_addr) && (addr < (base_addr + size))) || + ((addr == NULL) && (bdf != NULL))) { + + status = PF_HDL_FOUND; + + i_ddi_fm_acc_err_set(ap, derr->fme_ena, DDI_FM_NONFATAL, + DDI_FM_ERR_UNEXPECTED); + ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); + derr->fme_acc_handle = ap; + } + } + mutex_exit(&fcp->fc_lock); + + /* + * If no handles found and we know this is the right device mark + * all the handles as failed. + */ + if (addr && bdf != NULL && status == PF_HDL_NOTFOUND) + status = pf_pio_hdl_check(fmhdl, derr, NULL, bdf); + + return (status); +} + +/* + * Find and Mark all DNA Handles associated with a give address and BDF as + * failed. If the BDF != NULL, then check to see if the device has a DMA Handle + * associated with ADDR. If the handle is not found, mark all the handles as + * failed. If the BDF == NULL, mark the handle as failed if it is associated + * with ADDR. + */ +static int +pf_dma_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, + uint32_t addr, pcie_req_id_t bdf) +{ + ndi_fmc_t *fcp; + ndi_fmcentry_t *fep; + ddi_dma_impl_t *pcie_dp; + ddi_dma_handle_t dp; + int status = PF_HDL_NOTFOUND; + uint32_t base_addr; + uint_t size; + + if (!DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap) || + ((fcp = fmhdl->fh_dma_cache) == NULL)) + return (PF_HDL_NOTFOUND); + + mutex_enter(&fcp->fc_lock); + for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { + pcie_dp = (ddi_dma_impl_t *)fep->fce_resource; + dp = (ddi_dma_handle_t)fep->fce_resource; + base_addr = (uint32_t)pcie_dp->dmai_mapping; + size = pcie_dp->dmai_size; + + /* + * Mark the handle as failed if the ADDR is mapped, or if we + * know the BDF and ADDR == 0. + */ + if (((addr >= base_addr) && (addr < (base_addr + size))) || + ((addr == NULL) && (bdf != NULL))) { + + status = PF_HDL_FOUND; + + i_ddi_fm_dma_err_set(dp, derr->fme_ena, DDI_FM_NONFATAL, + DDI_FM_ERR_UNEXPECTED); + ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); + derr->fme_dma_handle = dp; + } + } + mutex_exit(&fcp->fc_lock); + + /* + * If no handles found and we know this is the right device mark + * all the handles as failed. + */ + if (addr && bdf != NULL && status == PF_HDL_NOTFOUND) + status = pf_dma_hdl_check(fmhdl, derr, NULL, bdf); + + return (status); +} + +/* + * If a PCIe device does not support AER, assume all AER statuses have been set, + * unless other registers do not indicate a certain error occuring. + */ +static void +pf_adjust_for_no_aer(pf_data_t *pf_data_p) +{ + uint32_t aer_ue = 0; + + if (pf_data_p->aer_off) + return; + + if (pf_data_p->dev_status & PCIE_DEVSTS_FE_DETECTED) { + aer_ue = PF_AER_FATAL_ERR; + } else if (pf_data_p->dev_status & PCIE_DEVSTS_NFE_DETECTED) { + aer_ue = PF_AER_NON_FATAL_ERR; + /* Check if the device received a PTLP */ + if (!(pf_data_p->status & PCI_STAT_PERROR)) + aer_ue &= ~PCIE_AER_UCE_PTLP; + + /* Check if the device signaled a CA */ + if (!(pf_data_p->status & PCI_STAT_S_TARG_AB)) + aer_ue &= ~PCIE_AER_UCE_CA; + + /* Check if the device sent a UR */ + if ((!pf_data_p->dev_status & PCIE_DEVSTS_UR_DETECTED)) + aer_ue &= ~PCIE_AER_UCE_UR; + + /* + * Ignore ECRCs as it is optional and will manefest itself as + * another error like PTLP and MFP + */ + aer_ue &= ~PCIE_AER_UCE_ECRC; + } + + if (pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) { + aer_ue &= ~PCIE_AER_UCE_TRAINING; + aer_ue &= ~PCIE_AER_UCE_SD; + } + pf_data_p->aer_ue_status = aer_ue; +} + +static void +pf_adjust_for_no_saer(pf_data_t *pf_data_p) +{ + uint32_t s_aer_ue = 0; + + if (pf_data_p->aer_off) + return; + + if (pf_data_p->dev_status & PCIE_DEVSTS_FE_DETECTED) { + s_aer_ue = PF_SAER_FATAL_ERR; + } else if (pf_data_p->dev_status & PCIE_DEVSTS_NFE_DETECTED) { + s_aer_ue = PF_SAER_NON_FATAL_ERR; + /* Check if the device received a UC_DATA */ + if (!(pf_data_p->s_status & PCI_STAT_PERROR)) + s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; + + /* Check if the device received a RCVD_MA/MA_ON_SC */ + if (!(pf_data_p->s_status & (PCI_STAT_R_MAST_AB))) { + s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; + s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; + } + + /* Check if the device received a RCVD_TA/TA_ON_SC */ + if (!(pf_data_p->s_status & (PCI_STAT_R_TARG_AB))) { + s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; + s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; + } + } + pf_data_p->s_aer_ue_status = s_aer_ue; +} + +/* Find the PCIe-PCI bridge of a PCI device */ +static pf_data_t * +pf_get_parent_pcie_bridge(pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + pf_data_t *bdg_pf_data_p; + + ASSERT(pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV); + + if (pf_data_p->parent_index == PF_DATA_NOT_FOUND) + return (NULL); + + for (bdg_pf_data_p = &dq_p[pf_data_p->parent_index]; + bdg_pf_data_p->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI; + bdg_pf_data_p = &dq_p[bdg_pf_data_p->parent_index]) { + if (!bdg_pf_data_p || (bdg_pf_data_p->parent_index == + PF_DATA_NOT_FOUND)) + return (NULL); + } + + return (bdg_pf_data_p); +} + +/* + * See if a leaf error was bubbled up to the RC and handled. + * Check if the RC logged an error with the appropriate status type/abort type. + * Ex: Parity Error, Received Master/Target Abort + * Check if either the fault address found in the rc matches the device's + * assigned address range (PIO's only) or the fault BDF in the rc matches the + * device's BDF or Secondary Bus. + */ +static boolean_t +pf_matched_in_rc(pf_data_t *dq_p, pf_data_t *pf_data_p, uint32_t abort_type) +{ + pf_data_t *rc_pf_data_p; + pcie_ppd_t *ppd_p; + + ppd_p = pcie_get_ppd(pf_data_p->dip); + for (rc_pf_data_p = dq_p; IS_RC(rc_pf_data_p); rc_pf_data_p++) { + /* If device and rc abort type does not match continue */ + if (!(rc_pf_data_p->s_status & abort_type)) + continue; + + /* The Fault BDF = Device's BDF */ + if (rc_pf_data_p->fault_bdf == pf_data_p->bdf) + return (B_TRUE); + + /* The Fault Addr is in device's address range */ + if (pf_in_addr_range(ppd_p, rc_pf_data_p->fault_addr)) + return (B_TRUE); + + /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ + if ((pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) && + ((rc_pf_data_p->fault_bdf & PCIE_REQ_ID_BUS_MASK) == + pf_data_p->bdg_secbus)) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Decodes the TLP and returns the BDF of the handler, address and transaction + * type if known. + * + * Types of TLP logs seen in RC, and what to extract: + * + * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR + * Memory(PIO) - address, PF_PIO_ADDR + * CFG - Should not occur and result in UR + * Completion(DMA) - Requester BDF, PF_DMA_ADDR + * Completion(PIO) - Requester BDF, PF_PIO_ADDR + * + * Types of TLP logs seen in SW/Leaf, and what to extract: + * + * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR + * Memory(PIO) - address, PF_PIO_ADDR + * CFG - Destined BDF, address, PF_CFG_ADDR + * Completion(DMA) - Requester BDF, PF_DMA_ADDR + * Completion(PIO) - Requester BDF, PF_PIO_ADDR + * + * If the TLP can be decoded the *bdf, *addr, and *trans_type will be populated + * with the TLP information. The caller may pass in NULL for any of the + * mentioned variables, if they are not interested in them. + */ +int +pf_tlp_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, pcie_req_id_t *bdf, + uint32_t *addr, uint32_t *trans_type) +{ + pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)&pf_data_p->aer_h0; + pcie_req_id_t rp_bdf, rid_bdf, tlp_bdf; + uint32_t tlp_addr, tlp_trans_type; + + if (pcie_get_bdf_from_dip(rpdip, &rp_bdf) != DDI_SUCCESS) + rp_bdf = (pcie_req_id_t)-1; + + switch (tlp_hdr->type) { + case PCIE_TLP_TYPE_IO: + case PCIE_TLP_TYPE_MEM: + case PCIE_TLP_TYPE_MEMLK: + tlp_addr = pf_data_p->aer_h3; + /* If the RID_BDF == RP_BDF, PIO, otherwise DMA */ + rid_bdf = (pcie_req_id_t)(pf_data_p->aer_h1 >> 16); + if (rid_bdf == rp_bdf) { + tlp_trans_type = PF_PIO_ADDR; + tlp_bdf = NULL; + } else { + tlp_trans_type = PF_DMA_ADDR; + tlp_bdf = rid_bdf; + } + break; + case PCIE_TLP_TYPE_CFG0: + case PCIE_TLP_TYPE_CFG1: + tlp_addr = 0; + tlp_bdf = (pcie_req_id_t)(pf_data_p->aer_h2 >> 16); + tlp_trans_type = PF_CFG_ADDR; + break; + case PCIE_TLP_TYPE_CPL: + case PCIE_TLP_TYPE_CPLLK: + tlp_addr = NULL; + /* + * If the completer bdf == RP_BDF, DMA, otherwise PIO or a CFG + * completion. + */ + tlp_bdf = (pcie_req_id_t)(pf_data_p->aer_h1 >> 16); + if (tlp_bdf == rp_bdf) + tlp_trans_type = PF_DMA_ADDR; + else + tlp_trans_type = PF_PIO_ADDR | PF_CFG_ADDR; + break; + default: + return (DDI_FAILURE); + } + + if (addr) + *addr = tlp_addr; + if (trans_type) + *trans_type = tlp_trans_type; + if (bdf) + *bdf = tlp_bdf; + + return (DDI_SUCCESS); +} + +/* + * pf_pci_decode function decodes the secondary aer transaction logs in + * PCIe-PCI bridges. + * + * The log is 128 bits long and arranged in this manner. + * [0:35] Transaction Attribute (s_aer_h0-saer_h1) + * [36:39] Transaction lower command (saer_h1) + * [40:43] Transaction upper command (saer_h1) + * [44:63] Reserved + * [64:127] Address (saer_h2-saer_h3) + */ +static int +pf_pci_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, uint16_t *cmd, + pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type) { + pcix_attr_t *attr; + pcie_req_id_t rp_bdf; + + if (pcie_get_bdf_from_dip(rpdip, &rp_bdf) != DDI_SUCCESS) + rp_bdf = (pcie_req_id_t)-1; + + *cmd = GET_SAER_CMD(pf_data_p); + + switch (*cmd) { + case PCI_PCIX_CMD_MEMRD_DW: + case PCI_PCIX_CMD_MEMRD_BL: + case PCI_PCIX_CMD_MEMRDBL: + case PCI_PCIX_CMD_MEMWR: + case PCI_PCIX_CMD_MEMWR_BL: + case PCI_PCIX_CMD_MEMWRBL: + *addr = pf_data_p->s_aer_h2; + attr = (pcix_attr_t *)&pf_data_p->s_aer_h0; + + /* + * Could be DMA or PIO. Find out by look at requesting bdf. + * If the requester is the RC, then it's a PIO, otherwise, DMA + */ + *bdf = attr->rid; + if (*bdf == rp_bdf) { + *trans_type = PF_PIO_ADDR; + *bdf = 0; + } else { + *trans_type = PF_DMA_ADDR; + } + break; + case PCI_PCIX_CMD_CFRD: + case PCI_PCIX_CMD_CFWR: + /* + * CFG Access should always be down stream. Match the BDF in + * the address phase. + */ + *addr = 0; + attr = (pcix_attr_t *)&pf_data_p->s_aer_h2; + *bdf = attr->rid; + *trans_type = PF_CFG_ADDR; + break; + case PCI_PCIX_CMD_SPL: + /* + * Check for DMA read completions. The requesting BDF is in the + * Address phase. + */ + *addr = 0; + attr = (pcix_attr_t *)&pf_data_p->s_aer_h0; + *bdf = attr->rid; + *trans_type = PF_DMA_ADDR; + break; + default: + *addr = 0; + *bdf = 0; + *trans_type = 0; + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +/* + * For this function only the Primary AER Header Logs need to be valid in the + * pfd (PCIe Fault Data) arg. + */ +int +pf_tlp_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pf_data_p) +{ + uint32_t addr; + int err = PF_HDL_NOTFOUND; + pcie_req_id_t hdl_bdf; + uint32_t trans_type; + + if (pf_tlp_decode(rpdip, pf_data_p, &hdl_bdf, &addr, &trans_type) == + DDI_SUCCESS) { + err = pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, + hdl_bdf); + } + + return (err); +} + +/* + * Last function called for PF Scan Fabric. + * Sends ereports for all devices that are not dev_type = RC. + * Will also unlock all the mutexes grabbed during fabric scan. + */ +/* ARGSUSED */ +static void +pf_send_ereport(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, + int dq_tail) +{ + char buf[FM_MAX_CLASS]; + pf_data_t *pfd_p; + int i, total = dq_tail; + + i = 0; + for (pfd_p = dq_p; IS_RC(pfd_p) && i <= dq_tail; pfd_p++, i++) { + total--; + } + + i = dq_tail; + for (pfd_p = &dq_p[dq_tail]; i >= 0; pfd_p--, i--) { + if (IS_RC(pfd_p)) + continue; + + if (pfd_p->send_erpt == PF_SEND_ERPT_NO) + goto unlock; + + if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) + goto unlock; + + (void) snprintf(buf, FM_MAX_CLASS, "%s", "fire.fabric"); + ddi_fm_ereport_post(pfd_p->dip, buf, derr->fme_ena, + DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, + "req_id", DATA_TYPE_UINT16, pfd_p->bdf, + "device_id", DATA_TYPE_UINT16, pfd_p->device_id, + "vendor_id", DATA_TYPE_UINT16, pfd_p->vendor_id, + "rev_id", DATA_TYPE_UINT8, pfd_p->rev_id, + "dev_type", DATA_TYPE_UINT16, pfd_p->dev_type, + "cap_off", DATA_TYPE_UINT16, pfd_p->pcie_off, + "aer_off", DATA_TYPE_UINT16, pfd_p->aer_off, + "sts_reg", DATA_TYPE_UINT16, pfd_p->status, + "sts_sreg", DATA_TYPE_UINT16, pfd_p->s_status, + "pcix_sts_reg", DATA_TYPE_UINT16, pfd_p->pcix_s_status, + "pcix_bdg_sts_reg", DATA_TYPE_UINT32, + pfd_p->pcix_bdg_status, + "dev_sts_reg", DATA_TYPE_UINT16, pfd_p->dev_status, + "aer_ce", DATA_TYPE_UINT32, pfd_p->aer_ce_status, + "aer_ue", DATA_TYPE_UINT32, pfd_p->aer_ue_status, + "aer_sev", DATA_TYPE_UINT32, pfd_p->aer_severity, + "aer_ctr", DATA_TYPE_UINT32, pfd_p->aer_control, + "aer_h1", DATA_TYPE_UINT32, pfd_p->aer_h0, + "aer_h2", DATA_TYPE_UINT32, pfd_p->aer_h1, + "aer_h3", DATA_TYPE_UINT32, pfd_p->aer_h2, + "aer_h4", DATA_TYPE_UINT32, pfd_p->aer_h3, + "saer_ue", DATA_TYPE_UINT32, pfd_p->s_aer_ue_status, + "saer_sev", DATA_TYPE_UINT32, pfd_p->s_aer_severity, + "saer_ctr", DATA_TYPE_UINT32, pfd_p->s_aer_control, + "saer_h1", DATA_TYPE_UINT32, pfd_p->s_aer_h0, + "saer_h2", DATA_TYPE_UINT32, pfd_p->s_aer_h1, + "saer_h3", DATA_TYPE_UINT32, pfd_p->s_aer_h2, + "saer_h4", DATA_TYPE_UINT32, pfd_p->s_aer_h3, + "remainder", DATA_TYPE_UINT32, total--, + "severity", DATA_TYPE_UINT32, pfd_p->severity_flags, + NULL); + +unlock: + pf_exit(pfd_p->dip); + } +} + +/* + * Ignore: + * - TRAINING: as leaves do not have children + * - SD: as leaves do not have children + */ +const pf_fab_err_tbl_t pcie_pcie_tbl[] = { + PCIE_AER_UCE_DLP, pf_panic, + PCIE_AER_UCE_PTLP, pf_analyse_ptlp, + PCIE_AER_UCE_FCP, pf_panic, + PCIE_AER_UCE_TO, pf_analyse_to, + PCIE_AER_UCE_CA, pf_analyse_ca_ur, + PCIE_AER_UCE_UC, pf_analyse_uc, + PCIE_AER_UCE_RO, pf_panic, + PCIE_AER_UCE_MTLP, pf_panic, + PCIE_AER_UCE_ECRC, pf_panic, + PCIE_AER_UCE_UR, pf_analyse_ca_ur, + NULL, NULL +}; + +const pf_fab_err_tbl_t pcie_sw_tbl[] = { + PCIE_AER_UCE_TRAINING, pf_no_panic, + PCIE_AER_UCE_DLP, pf_panic, + PCIE_AER_UCE_SD, pf_no_panic, + PCIE_AER_UCE_PTLP, pf_analyse_ptlp, + PCIE_AER_UCE_FCP, pf_panic, + PCIE_AER_UCE_TO, pf_analyse_to, + PCIE_AER_UCE_CA, pf_analyse_ca_ur, + PCIE_AER_UCE_UC, pf_analyse_uc, + PCIE_AER_UCE_RO, pf_panic, + PCIE_AER_UCE_MTLP, pf_panic, + PCIE_AER_UCE_ECRC, pf_panic, + PCIE_AER_UCE_UR, pf_analyse_ca_ur, + NULL, NULL +}; + +const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { + PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, + PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, + PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, + PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, + PCIE_AER_SUCE_USC_ERR, pf_panic, + PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, + PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, + PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, + PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, + PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, + PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, + PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, + PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, + NULL, NULL +}; + +const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { + PCI_STAT_PERROR, pf_analyse_pci, + PCI_STAT_S_PERROR, pf_analyse_pci, + PCI_STAT_S_SYSERR, pf_panic, + PCI_STAT_R_MAST_AB, pf_analyse_pci, + PCI_STAT_R_TARG_AB, pf_analyse_pci, + PCI_STAT_S_TARG_AB, pf_analyse_pci, + NULL, NULL +}; + +const pf_fab_err_tbl_t pcie_pci_tbl[] = { + PCI_STAT_PERROR, pf_analyse_pci, + PCI_STAT_S_PERROR, pf_analyse_pci, + PCI_STAT_S_SYSERR, pf_panic, + PCI_STAT_R_MAST_AB, pf_analyse_pci, + PCI_STAT_R_TARG_AB, pf_analyse_pci, + PCI_STAT_S_TARG_AB, pf_analyse_pci, + NULL, NULL +}; + +/* + * Analyse all the PCIe Fault Data (pfd) gathered during dispatch in the pfd + * Queue. + */ +static int +pf_analyse_error(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, + int dq_tail) +{ + int i = 0, pfd_err, err = 0; + pf_data_t *pf_data_p; + + for (pf_data_p = &dq_p[i]; i <= dq_tail; pf_data_p = &dq_p[++i]) { + pfd_err = 0; + switch (pf_data_p->dev_type) { + case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: + if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) + pfd_err |= PF_CE; + + pf_adjust_for_no_aer(pf_data_p); + pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, + pf_data_p, pcie_pcie_tbl, pf_data_p->aer_ue_status); + break; + case PCIE_PCIECAP_DEV_TYPE_UP: + case PCIE_PCIECAP_DEV_TYPE_DOWN: + if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) + pfd_err |= PF_CE; + + pf_adjust_for_no_aer(pf_data_p); + pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, + pf_data_p, pcie_sw_tbl, pf_data_p->aer_ue_status); + break; + case PCIE_PCIECAP_DEV_TYPE_ROOT: + /* Do not analyse RC info as it has already been done */ + pfd_err |= PF_MATCHED_RC; + break; + case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: + if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) + pfd_err |= PF_CE; + + if ((PCIE_DEVSTS_NFE_DETECTED | + PCIE_DEVSTS_FE_DETECTED) + & pf_data_p->dev_status) { + pf_adjust_for_no_aer(pf_data_p); + pf_adjust_for_no_saer(pf_data_p); + pfd_err |= pf_analyse_error_tbl(rpdip, derr, + dq_p, pf_data_p, pcie_pcie_tbl, + pf_data_p->aer_ue_status); + pfd_err |= pf_analyse_error_tbl(rpdip, derr, + dq_p, pf_data_p, pcie_pcie_bdg_tbl, + pf_data_p->s_aer_ue_status); + break; + } + /* + * Some non-compliant PCIe devices do not utilize PCIe + * error registers. So fallthrough and rely on legacy + * PCI error registers. + */ + /* FALLTHROUGH */ + case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: + pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, + pf_data_p, pcie_pci_tbl, pf_data_p->status); + if (pf_data_p->hdr_type == PCI_HEADER_ONE) + pfd_err |= pf_analyse_error_tbl(rpdip, derr, + dq_p, pf_data_p, pcie_pci_bdg_tbl, + pf_data_p->s_status); + break; + } + + err |= pfd_err; + pf_data_p->severity_flags = pfd_err; + } + + return (err); +} + +static int +pf_analyse_error_tbl(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, + pf_data_t *pf_data_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) { + const pf_fab_err_tbl_t *row; + int err = 0; + + for (row = tbl; err_reg && (row->bit != NULL) && !(err & PF_PANIC); + row++) { + if (err_reg & row->bit) + err |= row->handler(rpdip, derr, row->bit, dq_p, + pf_data_p); + } + + if (!err) + err = PF_NO_ERROR; + + return (err); +} + +/* + * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device + * issues a CA/UR a corresponding Received CA/UR should have been seen in the + * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then + * this error may be safely ignored. If not check the logs and see if an + * associated handler for this transaction can be found. + */ +/* ARGSUSED */ +static int +pf_analyse_ca_ur(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + uint32_t abort_type; + + if (bit == PCIE_AER_UCE_UR) + abort_type = PCI_STAT_R_MAST_AB; + else + abort_type = PCI_STAT_R_TARG_AB; + + if (pf_matched_in_rc(dq_p, pf_data_p, abort_type)) + return (PF_MATCHED_RC); + + if (HAS_AER_LOGS(pf_data_p, bit)) { + if (pf_tlp_hdl_lookup(rpdip, derr, pf_data_p) == + PF_HDL_NOTFOUND) + return (PF_PANIC); + + return (PF_MATCHED_DEVICE); + } + + return (PF_PANIC); +} + +/* + * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe + * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in + * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so + * then this error may be safely ignored. If not check the logs and see if an + * associated handler for this transaction can be found. + */ +/* ARGSUSED */ +static int +pf_analyse_ma_ta(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + uint16_t cmd; + uint32_t addr; + pcie_req_id_t bdf; + uint32_t abort_type, trans_type; + + if (bit == PCIE_AER_SUCE_RCVD_MA) + abort_type = PCI_STAT_R_MAST_AB; + else + abort_type = PCI_STAT_R_TARG_AB; + + if (pf_matched_in_rc(dq_p, pf_data_p, abort_type)) + return (PF_MATCHED_RC); + + if (!HAS_SAER_LOGS(pf_data_p, bit)) + return (PF_PANIC); + + if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != + DDI_SUCCESS) + return (PF_PANIC); + + if (pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, bdf) == + PF_HDL_NOTFOUND) + return (PF_PANIC); + + return (PF_MATCHED_DEVICE); +} + +/* + * Generic PCI error analyser. This function is used for Parity Errors, + * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. + * In general PCI devices do not have error logs, it is very difficult to figure + * out what transaction caused the error. Instead find the nearest PCIe-PCI + * Bridge and check to see if it has logs and if it has an error associated with + * this PCI Device. + */ +/* ARGSUSED */ +static int +pf_analyse_pci(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + pf_data_t *parent_pfd_p; + uint16_t cmd; + uint32_t addr; + pcie_req_id_t bdf; + uint32_t trans_type, aer_ue_status; + pcie_ppd_t *ppd_p; + + if (pf_data_p->status & PCI_STAT_S_SYSERR) + return (PF_PANIC); + + if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { + aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; + } else { + aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | + PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | + PCIE_AER_SUCE_RCVD_MA); + } + + parent_pfd_p = pf_get_parent_pcie_bridge(dq_p, pf_data_p); + if (parent_pfd_p == NULL) + return (PF_PANIC); + + if (!(parent_pfd_p->s_aer_ue_status & aer_ue_status) || + !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) + return (PF_PANIC); + + if (pf_pci_decode(rpdip, parent_pfd_p, &cmd, &bdf, &addr, &trans_type) + != DDI_SUCCESS) + return (PF_PANIC); + + /* + * If the addr or bdf from the parent PCIe bridge logs belong to this + * PCI device, assume the PCIe bridge's error handling has already taken + * care of this PCI device's error. + */ + ppd_p = pcie_get_ppd(pf_data_p->dip); + if ((bdf == pf_data_p->bdf) || pf_in_addr_range(ppd_p, addr)) + return (PF_MATCHED_PARENT); + + /* + * If this device is a PCI-PCI bridge, check if the bdf in the parent + * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. + * If they are, then assume the PCIe bridge's error handling has already + * taken care of this PCI-PCI bridge device's error. + */ + if ((pf_data_p->hdr_type == PCI_HEADER_ONE) && + pf_in_bus_range(ppd_p, bdf)) + return (PF_MATCHED_PARENT); + + return (PF_PANIC); +} + +/* + * PCIe Bridge transactions associated with PERR. + * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe + * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe + * o Bridge received a poisoned Completion on a Split Transction from PCIe + * o Bridge received a poisoned Completion on a Delayed Transction from PCIe + * + * Check for non-poisoned PCIe transactions that got forwarded to the secondary + * side and detects a PERR#. Except for delayed read completions, a poisoned + * TLP will be forwarded to the secondary bus and PERR# will be asserted. + */ +/* ARGSUSED */ +static int +pf_analyse_perr_assert(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + uint16_t cmd; + uint32_t addr; + pcie_req_id_t bdf; + uint32_t trans_type; + int sts; + int err = PF_NO_ERROR; + + if (HAS_SAER_LOGS(pf_data_p, bit)) { + if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, + &trans_type) != DDI_SUCCESS) + return (PF_PANIC); + + switch (cmd) { + case PCI_PCIX_CMD_MEMWR: + case PCI_PCIX_CMD_MEMWR_BL: + case PCI_PCIX_CMD_MEMWRBL: + /* Posted Writes Transactions */ + if (trans_type == PF_PIO_ADDR) + sts = pf_hdl_lookup(rpdip, derr->fme_ena, + trans_type, addr, bdf); + break; + case PCI_PCIX_CMD_CFWR: + /* + * Check to see if it is a non-posted write. If so, a + * UR Completion would have been sent. + */ + if (pf_matched_in_rc(dq_p, pf_data_p, + PCI_STAT_R_MAST_AB)) { + sts = PF_HDL_FOUND; + err = PF_MATCHED_RC; + break; + } + sts = pf_hdl_lookup(rpdip, derr->fme_ena, + trans_type, addr, bdf); + break; + case PCI_PCIX_CMD_SPL: + sts = pf_hdl_lookup(rpdip, derr->fme_ena, + trans_type, addr, bdf); + break; + default: + /* Unexpected situation, panic */ + sts = PF_HDL_NOTFOUND; + } + + if (sts == PF_HDL_NOTFOUND) + err = PF_PANIC; + } else { + /* + * Check to see if it is a non-posted write. If so, a UR + * Completion would have been sent. + */ + if ((pf_data_p->dev_status & PCIE_DEVCTL_UR_REPORTING_EN) && + pf_matched_in_rc(dq_p, pf_data_p, PCI_STAT_R_MAST_AB)) + err = PF_MATCHED_RC; + + /* Check for posted writes. Transaction is lost. */ + if (pf_data_p->s_status & PCI_STAT_S_PERROR) { + err = PF_PANIC; + } + + /* + * All other scenarios are due to read completions. Check for + * PERR on the primary side. If found the primary side error + * handling will take care of this error. + */ + if (err == PF_NO_ERROR) { + if (pf_data_p->status & PCI_STAT_PERROR) + err = PF_MATCHED_PARENT; + else + err = PF_PANIC; + } + } + + return (err); +} + +/* + * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, + * check the logs and see if an associated handler for this transaction can be + * found. + */ +/* ARGSUSED */ +static int +pf_analyse_ptlp(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + pf_data_t *parent_pfd_p; + + /* + * If AERs are supported find the logs in this device, otherwise look in + * it's parent's logs. + */ + if (HAS_AER_LOGS(pf_data_p, bit)) { + pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&pf_data_p->aer_h0; + + /* + * Double check that the log contains a poisoned TLP. + * Some devices like PLX switch do not log poison TLP headers. + */ + if (hdr->ep) { + if (pf_tlp_hdl_lookup(rpdip, derr, pf_data_p) == + PF_HDL_FOUND) + return (PF_MATCHED_DEVICE); + } + return (PF_PANIC); + } + + if (pf_data_p->parent_index != PF_DATA_NOT_FOUND) { + parent_pfd_p = &dq_p[pf_data_p->parent_index]; + if (HAS_AER_LOGS(parent_pfd_p, bit)) + return (PF_MATCHED_PARENT); + } + + return (PF_PANIC); +} + +/* + * PCIe-PCI Bridge Received Master and Target abort error analyser on Split + * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an + * associated handler for this transaction can be found. + */ +/* ARGSUSED */ +static int +pf_analyse_sc(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + uint16_t cmd; + uint32_t addr; + pcie_req_id_t bdf; + uint32_t trans_type; + int sts = PF_HDL_NOTFOUND; + + if (!HAS_SAER_LOGS(pf_data_p, bit)) + return (PF_PANIC); + + if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != + DDI_SUCCESS) + return (PF_PANIC); + + if (cmd == PCI_PCIX_CMD_SPL) + sts = pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, + addr, bdf); + + if (sts == PF_HDL_NOTFOUND) + return (PF_PANIC); + + return (PF_MATCHED_DEVICE); +} + +/* + * PCIe Timeout error analyser. This error can be forgiven if it is marked as + * CE Advisory. If it is marked as advisory, this means the HW can recover + * and/or retry the transaction automatically. + */ +/* ARGSUSED */ +static int +pf_analyse_to(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + /* + * If the Advisory Non-Fatal is set, that means HW will automatically + * retry the failed transaction. + */ + if (HAS_AER_LOGS(pf_data_p, bit) && CE_ADVISORY(pf_data_p)) + return (PF_NO_PANIC); + + return (PF_PANIC); +} + +/* + * PCIe Unexpected Completion. This error can be forgiven if it is marked as + * CE Advisory. If it is marked as advisory, this means the HW can recover + * and/or retry the transaction automatically. + */ +/* ARGSUSED */ +static int +pf_analyse_uc(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + /* + * Check to see if this TLP was misrouted by matching the device BDF + * with the TLP Log. If misrouting panic, otherwise don't panic. + */ + if (HAS_AER_LOGS(pf_data_p, bit) && + (pf_data_p->bdf == (pf_data_p->aer_h2 >> 16))) + return (PF_NO_PANIC); + + return (PF_PANIC); +} + +/* + * PCIe-PCI Bridge Uncorrectable Data error anlyser. All Uncorrectable Data + * errors should have resulted in a PCIe Poisoned TLP to the RC, except for + * Posted Writes. Check the logs for Posted Writes and if the RC did not see a + * Poisoned TLP. + * + * Non-Posted Writes will also generate a UR in the completion status, which the + * RC should also see. + */ +/* ARGSUSED */ +static int +pf_analyse_uc_data(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + uint16_t cmd; + uint32_t addr; + pcie_req_id_t bdf; + uint32_t trans_type; + + if (!HAS_SAER_LOGS(pf_data_p, bit)) + return (PF_PANIC); + + if (pf_matched_in_rc(dq_p, pf_data_p, PCI_STAT_PERROR)) + return (PF_MATCHED_RC); + + if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != + DDI_SUCCESS) + return (PF_PANIC); + + if (pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, bdf) == + PF_HDL_NOTFOUND) + return (PF_PANIC); + + return (PF_MATCHED_DEVICE); +} + +/* ARGSUSED */ +static int +pf_no_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + return (PF_NO_PANIC); +} + +/* ARGSUSED */ +static int +pf_matched_device(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + return (PF_MATCHED_DEVICE); +} + +/* ARGSUSED */ +static int +pf_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, + pf_data_t *dq_p, pf_data_t *pf_data_p) +{ + return (PF_PANIC); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/sun4v/io/px/px_err_gen.c Mon Dec 18 11:06:59 2006 -0800 @@ -0,0 +1,475 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The file has been code generated. Do NOT modify this file directly. Please + * use the sun4v PCIe FMA code generation tool. + * + * This file was generated for the following platforms: + * - Fire + * - N2PIU + */ + +/* ARGSUSED */ +static int +px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +{ + int err = 0; + + /* STOP bit indicates a secondary error. Panic if it is set */ + if (epkt->rc_descr.STOP == 1) + return (PX_PANIC); + + switch (epkt->rc_descr.op) { + case OP_DMA: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + case DIR_RDWR: + err = PX_PANIC; + break; + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_UE: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_UNKNOWN: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_PIO: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + switch (epkt->rc_descr.cond) { + case CND_UNMAP: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + case DIR_RDWR: + err = PX_PANIC; + break; + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_READ: + err = PX_PANIC; + break; + case DIR_WRITE: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_TO: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_UNKNOWN: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + switch (epkt->rc_descr.cond) { + case CND_UNMAP: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_UE: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ + + return (err); +} + + +/* ARGSUSED */ +static int +px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +{ + int err = 0; + + /* STOP bit indicates a secondary error. Panic if it is set */ + if (epkt->rc_descr.STOP == 1) + return (PX_PANIC); + + switch (epkt->rc_descr.op) { + case OP_BYPASS: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_UNKNOWN: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_TBW: + switch (epkt->rc_descr.phase) { + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_UNKNOWN: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_XLAT: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + case CND_IRR: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_PROT: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + case CND_UNMAP: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INV: + switch (epkt->rc_descr.dir) { + case DIR_RDWR: + err = PX_NO_PANIC; + break; + case DIR_UNKNOWN: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + case CND_IRR: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_PROT: + switch (epkt->rc_descr.dir) { + case DIR_WRITE: + err = PX_NO_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ + + if (epkt->rc_descr.D && (err & (PX_PANIC | PX_PROTECTED)) && + px_mmu_handle_lookup(dip, derr, epkt) == PF_HDL_FOUND) + err = PX_NO_PANIC; + + return (err); +} + + +/* ARGSUSED */ +static int +px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +{ + int err = 0; + + /* STOP bit indicates a secondary error. Panic if it is set */ + if (epkt->rc_descr.STOP == 1) + return (PX_PANIC); + + switch (epkt->rc_descr.op) { + case OP_MSI32: + switch (epkt->rc_descr.phase) { + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_UNKNOWN: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_MSI64: + switch (epkt->rc_descr.phase) { + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_UNKNOWN: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_MSIQ: + switch (epkt->rc_descr.phase) { + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = PX_PANIC; + break; + } /* DIR */ + break; + case CND_OV: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + err = px_intr_handle_errors(dip, derr, + epkt); + break; + } /* DIR */ + break; + } /* CND */ + break; + } /* PH */ + break; + case OP_PCIEMSG: + switch (epkt->rc_descr.phase) { + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_INGRESS: + err = PX_PANIC; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ + + return (err); +}
--- a/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc Mon Dec 18 11:06:59 2006 -0800 @@ -2,8 +2,9 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -18,10 +19,10 @@ * * CDDL HEADER END */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -38,18 +39,10 @@ #define EBUS_FIT 1000 #define LINK_EVENTS_COUNT 10 #define LINK_EVENTS_TIME 1h -#define CE_EVENTS_COUNT 10 -#define CE_EVENTS_TIME 1h - + #define PROP_PLAT_FRU "PLAT-FRU" /* - * Macros for CE Fabric ereports - */ -#define PF_CE (1 << 1) -#define MATCH_CE (payloadprop("severity") == PF_CE) - -/* * Test for primary or secondary ereports */ #define IS_PRIMARY (payloadprop("primary")) @@ -202,8 +195,7 @@ * Whenever a leaf device sends an error message (fatal, non-fatal, or CE) to * root complex, the nexus driver publishes this ereport to log the ereport. */ -event ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn {within(1s)}; -event ereport.io.fire.fabric@pcibus/pcidev/pcifn {within(1s)}; +event ereport.io.fire.fabric@hostbridge/pciexrc {within(5s)}; /* * A faulty Fire hostbridge may cause (* may cause PCI-E abort): @@ -730,56 +722,7 @@ ereport.io.fire.pec.lin@hostbridge/pciexrc, ereport.io.fire.pec.lup@hostbridge/pciexrc, error.io.fire.pec.secondary@hostbridge/pciexrc, - error.io.fire.pec.fabric_error@hostbridge/pciexrc + error.io.fire.pec.fabric_error@hostbridge/pciexrc, + ereport.io.fire.fabric@hostbridge/pciexrc ; -event error.io.fire.fabric@hostbridge/pciexrc; - -prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (1) -> - ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn { !MATCH_CE }; - -prop fault.io.fire.pci.device@pcibus/pcidev/pcifn (1) -> - ereport.io.fire.fabric@pcibus/pcidev/pcifn; - -/* - * Below rules are so we get a single suspect list in 1 fault with percentage - * of indiction being equal among all the suspect FRUs - */ -prop error.io.fire.fabric@hostbridge/pciexrc (0) -> - ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn { - is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn) && !MATCH_CE - }; - -prop error.io.fire.fabric@hostbridge/pciexrc (0) -> - ereport.io.fire.fabric@pcibus/pcidev/pcifn { - is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn) - }; - -prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (1) -> - error.io.fire.fabric@hostbridge/pciexrc { - is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn) - }; - -prop fault.io.fire.pci.device@pcibus/pcidev/pcifn (1) -> - error.io.fire.fabric@hostbridge/pciexrc { - is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn) - }; - -event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn{within(1s)}; -event ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn{within(1s)}; - -/* SERD CEs */ -prop upset.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f] (0) -> - ereport.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f] - { MATCH_CE }; - -event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn, - engine=serd.io.fire.fabric@pciexbus/pciexdev/pciexfn; - -engine serd.io.fire.fabric@pciexbus/pciexdev/pciexfn, - N=CE_EVENTS_COUNT, T=CE_EVENTS_TIME, method=persistent, - trip=ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn; - -prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (0) -> - ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn; -
--- a/usr/src/uts/common/Makefile.files Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/common/Makefile.files Mon Dec 18 11:06:59 2006 -0800 @@ -1301,7 +1301,7 @@ INCLUDE_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common # -PCIE_OBJS += pcie.o pcie_fault.o +PCIE_OBJS += pcie.o # Chelsio N110 10G NIC driver module #
--- a/usr/src/uts/common/io/pcie.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/common/io/pcie.c Mon Dec 18 11:06:59 2006 -0800 @@ -32,28 +32,28 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/sunndi.h> -#include <sys/fm/protocol.h> -#include <sys/fm/util.h> -#include <sys/promif.h> -#include <sys/disp.h> +#include <sys/promif.h> /* prom_printf */ +#include <sys/disp.h> /* prom_printf */ #include <sys/pcie.h> #include <sys/pci_cap.h> #include <sys/pcie_impl.h> +#include <sys/pci_impl.h> +static int pcie_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf); dev_info_t *pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); uint32_t pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip); -#ifdef DEBUG +#ifdef DEBUG uint_t pcie_debug_flags = 0; #define PCIE_DBG pcie_dbg static void pcie_dbg(char *fmt, ...); -#else /* DEBUG */ +#else /* DEBUG */ #define PCIE_DBG 0 && -#endif /* DEBUG */ +#endif /* DEBUG */ /* Variable to control default PCI-Express config settings */ ushort_t pcie_command_default = PCI_COMM_SERR_ENABLE | @@ -126,159 +126,95 @@ int pcie_initchild(dev_info_t *cdip) { + ddi_acc_handle_t config_handle; uint8_t header_type; uint8_t bcr; uint16_t command_reg, status_reg; - pcie_ppd_t *ppd_p; - ddi_acc_handle_t eh; + uint16_t cap_ptr; + pci_parent_data_t *pd_p; - ppd_p = pcie_init_ppd(cdip); - if (ppd_p == NULL) + if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) return (DDI_FAILURE); - eh = ppd_p->ppd_cfg_hdl; + /* Allocate memory for pci parent data */ + pd_p = kmem_zalloc(sizeof (pci_parent_data_t), KM_SLEEP); + + /* + * Retrieve and save BDF and PCIE2PCI bridge's secondary bus + * information in the parent private data structure. + */ + if (pcie_get_bdf_from_dip(cdip, &pd_p->pci_bdf) != DDI_SUCCESS) + goto fail; + + pd_p->pci_sec_bus = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, 0, + "pcie2pci-sec-bus", 0); - /* setup the device's command register */ - header_type = ppd_p->ppd_hdr_type; - status_reg = pci_config_get16(eh, PCI_CONF_STAT); - pci_config_put16(eh, PCI_CONF_STAT, status_reg); - command_reg = pci_config_get16(eh, PCI_CONF_COMM); + /* + * Determine the configuration header type. + */ + header_type = pci_config_get8(config_handle, PCI_CONF_HEADER); + PCIE_DBG("%s: header_type=%x\n", ddi_driver_name(cdip), header_type); + + /* + * Setup the device's command register + */ + status_reg = pci_config_get16(config_handle, PCI_CONF_STAT); + pci_config_put16(config_handle, PCI_CONF_STAT, status_reg); + command_reg = pci_config_get16(config_handle, PCI_CONF_COMM); command_reg |= pcie_command_default; - pci_config_put16(eh, PCI_CONF_COMM, command_reg); + pci_config_put16(config_handle, PCI_CONF_COMM, command_reg); - PCIE_DBG("pcie_initchild: %s(dip 0x%p), header_type=%x, " - "command=%x\n", ddi_driver_name(cdip), (void *)cdip, - header_type, pci_config_get16(eh, PCI_CONF_COMM)); + PCIE_DBG("%s: command=%x\n", ddi_driver_name(cdip), + pci_config_get16(config_handle, PCI_CONF_COMM)); /* * If the device has a bus control register then program it * based on the settings in the command register. */ - if (header_type == PCI_HEADER_ONE) { - status_reg = pci_config_get16(eh, + if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { + status_reg = pci_config_get16(config_handle, PCI_BCNF_SEC_STATUS); - pci_config_put16(eh, PCI_BCNF_SEC_STATUS, + pci_config_put16(config_handle, PCI_BCNF_SEC_STATUS, status_reg); - bcr = pci_config_get8(eh, PCI_BCNF_BCNTRL); + bcr = pci_config_get8(config_handle, PCI_BCNF_BCNTRL); if (pcie_command_default & PCI_COMM_PARITY_DETECT) bcr |= PCI_BCNF_BCNTRL_PARITY_ENABLE; if (pcie_command_default & PCI_COMM_SERR_ENABLE) bcr |= PCI_BCNF_BCNTRL_SERR_ENABLE; bcr |= PCI_BCNF_BCNTRL_MAST_AB_MODE; - pci_config_put8(eh, PCI_BCNF_BCNTRL, bcr); + pci_config_put8(config_handle, PCI_BCNF_BCNTRL, bcr); } - if (ppd_p->ppd_pcie_off) - pcie_enable_errors(cdip, eh); - - return (DDI_SUCCESS); -} - -/* Initialize PCIe Parent Private Data */ -pcie_ppd_t * -pcie_init_ppd(dev_info_t *cdip) -{ - pcie_ppd_t *ppd_p = 0; - ddi_acc_handle_t eh; - int range_size; - - /* allocate memory for pcie parent data */ - ppd_p = kmem_zalloc(sizeof (pcie_ppd_t), KM_SLEEP); - - /* Create an config access special to error handling */ - if (pci_config_setup(cdip, &eh) != DDI_SUCCESS) { - kmem_free(ppd_p, sizeof (pcie_ppd_t)); - return (NULL); - } - ppd_p->ppd_cfg_hdl = eh; + if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + != DDI_FAILURE) { + pcie_enable_errors(cdip, config_handle); - /* get device's bus/dev/function number */ - if (pcie_get_bdf_from_dip(cdip, &ppd_p->ppd_bdf) != DDI_SUCCESS) - goto fail; - - /* Save the Vendor Id Device Id */ - ppd_p->ppd_dev_ven_id = pci_config_get32(eh, PCI_CONF_VENID); - - /* Save the Header Type */ - ppd_p->ppd_hdr_type = pci_config_get8(eh, PCI_CONF_HEADER); - ppd_p->ppd_hdr_type &= PCI_HEADER_TYPE_M; - - /* Save the Range information if device is a switch/bridge */ - if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) { - /* get "bus_range" property */ - range_size = sizeof (pci_bus_range_t); - if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, - "bus-range", (caddr_t)&ppd_p->ppd_bus_range, &range_size) - != DDI_PROP_SUCCESS) - goto fail; - - /* get secondary bus number */ - ppd_p->ppd_bdg_secbus = pci_config_get8(eh, PCI_BCNF_SECBUS); - - /* Get "ranges" property */ - if (ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, - "ranges", (caddr_t)&ppd_p->ppd_addr_ranges, - &ppd_p->ppd_addr_entries) != DDI_PROP_SUCCESS) - ppd_p->ppd_addr_entries = 0; - ppd_p->ppd_addr_entries /= sizeof (ppb_ranges_t); + pd_p->pci_phfun = (pci_config_get8(config_handle, + cap_ptr + PCIE_DEVCAP) & PCIE_DEVCAP_PHTM_FUNC_MASK) >> 3; } - /* save "assigned-addresses" property array, ignore failues */ - if (ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, - "assigned-addresses", (caddr_t)&ppd_p->ppd_assigned_addr, - &ppd_p->ppd_assigned_entries) == DDI_PROP_SUCCESS) - ppd_p->ppd_assigned_entries /= sizeof (pci_regspec_t); - else - ppd_p->ppd_assigned_entries = 0; - - if ((PCI_CAP_LOCATE(eh, PCI_CAP_ID_PCI_E, &ppd_p->ppd_pcie_off)) - != DDI_FAILURE) { - ppd_p->ppd_dev_type = PCI_CAP_GET16(eh, NULL, - ppd_p->ppd_pcie_off, PCIE_PCIECAP) & - PCIE_PCIECAP_DEV_TYPE_MASK; - - ppd_p->ppd_pcie_phfun = (pci_config_get8(eh, - ppd_p->ppd_pcie_off + PCIE_DEVCAP) & - PCIE_DEVCAP_PHTM_FUNC_MASK) >> 3; - - if (PCI_CAP_LOCATE(eh, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_AER), - &ppd_p->ppd_aer_off) != DDI_SUCCESS) - ppd_p->ppd_aer_off = NULL; - } else { - ppd_p->ppd_pcie_off = NULL; - ppd_p->ppd_pcie_phfun = NULL; - ppd_p->ppd_dev_type = PCIE_PCIECAP_DEV_TYPE_PCI_DEV; - } - - ppd_p->ppd_dip = cdip; - ppd_p->ppd_fm_flags = 0; - ddi_set_parent_data(cdip, (void *)ppd_p); - - PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n", - ddi_driver_name(cdip), (void *)cdip, ppd_p->ppd_bdf, - ppd_p->ppd_bdg_secbus); - - return (ppd_p); + ddi_set_parent_data(cdip, (void *)pd_p); + pci_config_teardown(&config_handle); + return (DDI_SUCCESS); fail: - cmn_err(CE_WARN, "PCIE init err info failed BDF 0x%x\n", - ppd_p->ppd_bdf); - pci_config_teardown(&eh); - kmem_free(ppd_p, sizeof (pcie_ppd_t)); - return (NULL); + cmn_err(CE_WARN, "PCIE init child failed\n"); + kmem_free(pd_p, sizeof (pci_parent_data_t)); + pci_config_teardown(&config_handle); + return (DDI_FAILURE); } int pcie_postattach_child(dev_info_t *dip) { - ddi_acc_handle_t cfg_hdl; + ddi_acc_handle_t config_handle; int rval = DDI_FAILURE; - if (pci_config_setup(dip, &cfg_hdl) != DDI_SUCCESS) + if (pci_config_setup(dip, &config_handle) != DDI_SUCCESS) return (DDI_FAILURE); - rval = pcie_enable_ce(dip, cfg_hdl); + rval = pcie_enable_ce(dip, config_handle); - pci_config_teardown(&cfg_hdl); + pci_config_teardown(&config_handle); return (rval); } @@ -286,87 +222,83 @@ * PCI-Express child device de-initialization. * This function disables generic pci-express interrupts and error * handling. + * + * @param pdip parent dip (root nexus's dip) + * @param cdip child's dip (device's dip) + * @param arg pcie private data */ +/* ARGSUSED */ void pcie_uninitchild(dev_info_t *cdip) { - pcie_ppd_t *ppd_p; - ppd_p = pcie_get_ppd(cdip); - - pcie_disable_errors(cdip, ppd_p->ppd_cfg_hdl); - pcie_uninit_ppd(cdip); -} + ddi_acc_handle_t config_handle; + pci_parent_data_t *pd_p; -void -pcie_uninit_ppd(dev_info_t *cdip) -{ - pcie_ppd_t *ppd_p; + if (pd_p = ddi_get_parent_data(cdip)) { + ddi_set_parent_data(cdip, NULL); + kmem_free(pd_p, sizeof (pci_parent_data_t)); + } - ppd_p = pcie_get_ppd(cdip); - ASSERT(ppd_p); - pci_config_teardown(&ppd_p->ppd_cfg_hdl); - kmem_free(ppd_p->ppd_assigned_addr, - (sizeof (pci_regspec_t) * ppd_p->ppd_assigned_entries)); - kmem_free(ppd_p->ppd_addr_ranges, - (sizeof (ppb_ranges_t) * ppd_p->ppd_addr_entries)); + if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) + return; - kmem_free(ppd_p, sizeof (pcie_ppd_t)); - ddi_set_parent_data(cdip, NULL); + pcie_disable_errors(cdip, config_handle); + + pci_config_teardown(&config_handle); } /* ARGSUSED */ void -pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) +pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) { uint16_t cap_ptr, aer_ptr, dev_type, device_sts; int rval = DDI_FAILURE; /* 1. clear the Legacy PCI Errors */ - device_sts = pci_config_get16(cfg_hdl, PCI_CONF_STAT); - pci_config_put16(cfg_hdl, PCI_CONF_STAT, device_sts); + device_sts = pci_config_get16(config_handle, PCI_CONF_STAT); + pci_config_put16(config_handle, PCI_CONF_STAT, device_sts); - if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) == - DDI_FAILURE) + if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + == DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; - /* 1.1 clear the Legacy PCI Secondary Bus Errors */ - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { - device_sts = pci_config_get16(cfg_hdl, - PCI_BCNF_SEC_STATUS); - pci_config_put16(cfg_hdl, PCI_BCNF_SEC_STATUS, - device_sts); - } - /* * Clear any pending errors */ /* 2. clear the Advanced PCIe Errors */ if (rval != DDI_FAILURE) { - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_STS, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_STS, -1); - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_STS, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_STS, -1); - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCI2PCIE) { - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_STS, -1); } } /* 3. clear the PCIe Errors */ - if ((device_sts = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + if ((device_sts = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVSTS)) != PCI_CAP_EINVAL16) - PCI_CAP_PUT16(cfg_hdl, PCI_CAP_ID_PCI_E, cap_ptr, + PCI_CAP_PUT16(config_handle, PCI_CAP_ID_PCI_E, cap_ptr, PCIE_DEVSTS, device_sts); + + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + device_sts = pci_config_get16(config_handle, + PCI_BCNF_SEC_STATUS); + pci_config_put16(config_handle, PCI_BCNF_SEC_STATUS, + device_sts); + } } void -pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) +pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) { uint16_t cap_ptr, aer_ptr, dev_type, device_ctl; uint32_t aer_reg; @@ -375,29 +307,29 @@ /* * Clear any pending errors */ - pcie_clear_errors(dip, cfg_hdl); + pcie_clear_errors(dip, config_handle); - if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_PCIECAP) & - PCIE_PCIECAP_DEV_TYPE_MASK; + dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; /* * Enable Baseline Error Handling but leave CE reporting off (poweron * default). */ - if ((device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + if ((device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL)) != PCI_CAP_EINVAL16) { - PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, + PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, pcie_devctl_default | (pcie_base_err_default & (~PCIE_DEVCTL_CE_REPORTING_EN))); PCIE_DBG("%s%d: devctl 0x%x -> 0x%x\n", ddi_node_name(dip), ddi_get_instance(dip), device_ctl, - PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL)); } @@ -409,22 +341,22 @@ } /* Enable Uncorrectable errors */ - if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_MASK)) != PCI_CAP_EINVAL32) { - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_MASK, pcie_aer_uce_mask); PCIE_DBG("%s: AER UCE=0x%x->0x%x\n", ddi_driver_name(dip), - aer_reg, PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, + aer_reg, PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_MASK)); } /* Enable ECRC generation and checking */ - if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_CTL)) != PCI_CAP_EINVAL32) { aer_reg |= (PCIE_AER_CTL_ECRC_GEN_ENA | PCIE_AER_CTL_ECRC_CHECK_ENA); - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CTL, aer_reg); } @@ -437,12 +369,12 @@ /* * Enable secondary bus errors */ - if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_MASK)) != PCI_CAP_EINVAL32) { - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_MASK, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_MASK, pcie_aer_suce_mask); PCIE_DBG("%s: AER SUCE=0x%x->0x%x\n", ddi_driver_name(dip), - aer_reg, PCI_XCAP_GET32(cfg_hdl, + aer_reg, PCI_XCAP_GET32(config_handle, PCIE_EXT_CAP_ID_AER, aer_ptr, PCIE_AER_SUCE_MASK)); } } @@ -453,12 +385,12 @@ * a call to pcie_enable_errors. */ int -pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) +pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t config_handle) { uint16_t cap_ptr, aer_ptr, device_sts, device_ctl; uint32_t tmp_pcie_aer_ce_mask; - if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return (DDI_FAILURE); @@ -478,60 +410,60 @@ return (DDI_SUCCESS); } - if (PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC + if (PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr) != DDI_FAILURE) { /* Enable AER CE */ - PCI_XCAP_PUT32(cfg_hdl, PCIE_EXT_CAP_ID_AER, + PCI_XCAP_PUT32(config_handle, PCIE_EXT_CAP_ID_AER, aer_ptr, PCIE_AER_CE_MASK, tmp_pcie_aer_ce_mask); PCIE_DBG("%s: AER CE set to 0x%x\n", - ddi_driver_name(dip), PCI_XCAP_GET32(cfg_hdl, NULL, + ddi_driver_name(dip), PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_CE_MASK)); /* Clear any pending AER CE errors */ - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_STS, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_STS, -1); } /* clear any pending CE errors */ - if ((device_sts = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + if ((device_sts = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVSTS)) != PCI_CAP_EINVAL16) - PCI_CAP_PUT16(cfg_hdl, PCI_CAP_ID_PCI_E, cap_ptr, + PCI_CAP_PUT16(config_handle, PCI_CAP_ID_PCI_E, cap_ptr, PCIE_DEVSTS, device_sts & (~PCIE_DEVSTS_CE_DETECTED)); /* Enable CE reporting */ - device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL); - PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, + device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL); + PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, (device_ctl & (~PCIE_DEVCTL_ERR_MASK)) | pcie_base_err_default); PCIE_DBG("%s%d: devctl 0x%x -> 0x%x\n", ddi_node_name(dip), ddi_get_instance(dip), device_ctl, - PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL)); + PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL)); return (DDI_SUCCESS); } /* ARGSUSED */ void -pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) +pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) { uint16_t cap_ptr, aer_ptr, dev_type, device_ctl; uint32_t aer_reg; int rval = DDI_FAILURE; - if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, + dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; /* * Disable PCI-Express Baseline Error Handling */ - device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL); + device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL); device_ctl &= ~PCIE_DEVCTL_ERR_MASK; - PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, device_ctl); + PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, device_ctl); /* * Disable PCI-Express Advanced Error Handling if Exists @@ -541,20 +473,20 @@ } /* Disable Uncorrectable errors */ - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_MASK, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_MASK, PCIE_AER_UCE_BITS); /* Disable Correctable errors */ - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_MASK, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_MASK, PCIE_AER_CE_BITS); /* Disable ECRC generation and checking */ - if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, PCIE_AER_CTL)) != PCI_CAP_EINVAL32) { aer_reg &= ~(PCIE_AER_CTL_ECRC_GEN_ENA | PCIE_AER_CTL_ECRC_CHECK_ENA); - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CTL, aer_reg); } /* @@ -566,15 +498,12 @@ /* * Disable secondary bus errors */ - PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_MASK, + PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_MASK, PCIE_AER_SUCE_BITS); } -/* - * Extract bdf from "reg" property. - */ -int -pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf) +static int +pcie_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf) { pci_regspec_t *regspec; int reglen; @@ -631,33 +560,14 @@ PCI_GET_SEC_BUS(cdip) : PCI_GET_BDF(cdip)); } -/* - * Returns Parent Private Data for PCIe devices and PCI devices that are in PCIe - * systems - */ -pcie_ppd_t * -pcie_get_ppd(dev_info_t *dip) -{ - return ((pcie_ppd_t *)ddi_get_parent_data(dip)); -} - -/* - * Is the rdip a child of dip. Used for checking certain CTLOPS from bubbling - * up erronously. Ex. ISA ctlops to a PCI-PCI Bridge. - */ -boolean_t -pcie_is_child(dev_info_t *dip, dev_info_t *rdip) -{ - dev_info_t *cdip = ddi_get_child(dip); - for (; cdip; cdip = ddi_get_next_sibling(cdip)) - if (cdip == rdip) - break; - return (cdip != NULL); -} - #ifdef DEBUG /* - * For debugging purposes set pcie_dbg_print != 0 to see printf messages + * This is a temporary stop gap measure. + * PX runs at PIL 14, which is higher than the clock's PIL. + * As a results we cannot safely print while servicing interrupts using + * cmn_err or prom_printf. + * + * For debugging purposes set px_dbg_print != 0 to see printf messages * during interrupt. * * When a proper solution is in place this code will disappear.
--- a/usr/src/uts/common/io/pcie_fault.c Mon Dec 18 10:59:02 2006 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1893 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/sysmacros.h> -#include <sys/types.h> -#include <sys/kmem.h> -#include <sys/modctl.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/sunndi.h> -#include <sys/fm/protocol.h> -#include <sys/fm/util.h> -#include <sys/promif.h> -#include <sys/disp.h> -#include <sys/pcie.h> -#include <sys/pci_cap.h> -#include <sys/pcie_impl.h> - -/* size of error queue */ -uint_t pf_dq_size = 32; - -#define PF_AER_FATAL_ERR (PCIE_AER_UCE_DLP | PCIE_AER_UCE_SD |\ - PCIE_AER_UCE_FCP | PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP) -#define PF_AER_NON_FATAL_ERR (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_TO | \ - PCIE_AER_UCE_CA | PCIE_AER_UCE_ECRC | PCIE_AER_UCE_UR) - -#define PF_SAER_FATAL_ERR (PCIE_AER_SUCE_USC_MSG_DATA_ERR | \ - PCIE_AER_SUCE_UC_ATTR_ERR | PCIE_AER_SUCE_UC_ADDR_ERR | \ - PCIE_AER_SUCE_SERR_ASSERT) -#define PF_SAER_NON_FATAL_ERR (PCIE_AER_SUCE_TA_ON_SC | \ - PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | \ - PCIE_AER_SUCE_RCVD_MA | PCIE_AER_SUCE_USC_ERR | \ - PCIE_AER_SUCE_UC_DATA_ERR | PCIE_AER_SUCE_TIMER_EXPIRED | \ - PCIE_AER_SUCE_PERR_ASSERT | PCIE_AER_SUCE_INTERNAL_ERR) - -#define PF_PCIE_BDG_ERR (PCIE_DEVSTS_FE_DETECTED | PCIE_DEVSTS_NFE_DETECTED | \ - PCIE_DEVSTS_CE_DETECTED) - -#define PF_PCI_BDG_ERR (PCI_STAT_S_SYSERR | PCI_STAT_S_TARG_AB | \ - PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB | PCI_STAT_S_PERROR) - - -#define PF_DATA_NOT_FOUND -1 - -#define HAS_AER_LOGS(pf_data_p, bit) \ - (pf_data_p->aer_off && (bit & (pf_data_p->aer_control & \ - PCIE_AER_CTL_FST_ERR_PTR_MASK))) - -#define HAS_SAER_LOGS(pf_data_p, bit) \ - (pf_data_p->aer_off && (bit & (pf_data_p->s_aer_control & \ - PCIE_AER_SCTL_FST_ERR_PTR_MASK))) - -#define GET_SAER_CMD(pf_data_p) \ - (pf_data_p->s_aer_h1 >> PCIE_AER_SUCE_HDR_CMD_LWR_SHIFT) & \ - PCIE_AER_SUCE_HDR_CMD_LWR_MASK; - -#define CE_ADVISORY(pf_data_p) \ - (pf_data_p->aer_ce_status & PCIE_AER_CE_AD_NFE) - -#define IS_RC(pf_data_p) \ - (pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_ROOT) - -/* PCIe Fault Fabric Error analysis table */ -typedef struct pf_fab_err_tbl { - uint32_t bit; /* Error bit */ - int (*handler)(); /* Error handling fuction */ -} pf_fab_err_tbl_t; - -/* DMA/PIO/CFG Handle Comparason Function Declaration */ -typedef int (*pf_hdl_compare_t)(struct i_ddi_fmhdl *, ddi_fm_error_t *, - uint32_t, pcie_req_id_t); - -/* PCIe Fault Support Functions. */ -static int pf_find_in_q(pcie_req_id_t bdf, pf_data_t *dq_p, int dq_tail); -static boolean_t pf_in_bus_range(pcie_ppd_t *ppd_p, pcie_req_id_t bdf); -static boolean_t pf_in_addr_range(pcie_ppd_t *ppd_p, uint32_t addr); -static int pf_pcie_dispatch(dev_info_t *pdip, pf_impl_t *impl); -static int pf_pci_dispatch(dev_info_t *pdip, pf_impl_t *impl); -static int pf_default_hdl(dev_info_t *dip, dev_info_t *pdip, - pcie_ppd_t *ppd_p, pf_impl_t *impl); - -/* PCIe Fabric Handle Lookup Support Functions. */ -static int pf_hdl_child_lookup(dev_info_t *rpdip, dev_info_t *dip, - ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf, - pf_hdl_compare_t cf); -static int pf_cfg_hdl_check(struct i_ddi_fmhdl *fmhdl, - ddi_fm_error_t *derr, uint32_t notused, pcie_req_id_t bdf); -static int pf_pio_hdl_check(struct i_ddi_fmhdl *fmhdl, - ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf); -static int pf_dma_hdl_check(struct i_ddi_fmhdl *fmhdl, - ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf); - - -/* PCIe/PCI Fault Handling Support Functions. */ -static int pf_pci_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, uint16_t *cmd, - pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type); -static int pf_analyse_error(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *q, int last_index); -static void pf_send_ereport(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *dq_p, int dq_tail); -static void pf_adjust_for_no_aer(pf_data_t *pf_data_p); -static void pf_adjust_for_no_saer(pf_data_t *pf_data_p); -static pf_data_t *pf_get_parent_pcie_bridge(pf_data_t *dq_p, - pf_data_t *pf_data_p); -static boolean_t pf_matched_in_rc(pf_data_t *dq_p, pf_data_t *pf_data_p, - uint32_t abort_type); -static int pf_analyse_error_tbl(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *dq_p, pf_data_t *pf_data_p, const pf_fab_err_tbl_t *tbl, - uint32_t err_reg); -static int pf_analyse_ca_ur(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_ma_ta(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_pci(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_perr_assert(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_ptlp(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_sc(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_to(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_uc(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_analyse_uc_data(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_matched_device(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_no_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static int pf_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, - uint32_t bit, pf_data_t *dq_p, pf_data_t *pf_data_p); -static void pf_check_ce(pf_data_t *dq_p, int dq_tail); -static void pf_set_parent_erpt(pf_data_t *dq_p, int index, int erpt_val); - -int -pf_held(dev_info_t *dip) -{ - pcie_ppd_t *ppd_p = pcie_get_ppd(dip); - return (mutex_owned(&ppd_p->ppd_fm_lock)); -} - -boolean_t -pf_enter(dev_info_t *dip) -{ - pcie_ppd_t *ppd_p = pcie_get_ppd(dip); - if (!(ppd_p->ppd_fm_flags & PF_FM_READY)) - return (B_FALSE); - if (!pf_held(dip)) - mutex_enter(&ppd_p->ppd_fm_lock); - return (B_TRUE); -} - -void -pf_exit(dev_info_t *dip) -{ - pcie_ppd_t *ppd_p = pcie_get_ppd(dip); - mutex_exit(&ppd_p->ppd_fm_lock); -} - -/* - * Default pci/pci-x/pci-e error handler callbacks for - * SPARC PCI-E platforms - */ - -/* Called during postattach to initalize FM lock */ -void -pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc) -{ - pcie_ppd_t *ppd_p = pcie_get_ppd(dip); - struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl; - int cap = DDI_FM_EREPORT_CAPABLE; - - mutex_init(&ppd_p->ppd_fm_lock, NULL, MUTEX_DRIVER, (void *)ibc); - - if (fmhdl) { - fmhdl->fh_cap |= cap; - } else { - ppd_p->ppd_fm_flags |= PF_IS_NH; - ddi_fm_init(dip, &cap, &ibc); - } - ppd_p->ppd_fm_flags |= PF_FM_READY; -} - -/* undo OPL FMA lock, called at predetach */ -void -pf_fini(dev_info_t *dip) -{ - pcie_ppd_t *ppd_p = pcie_get_ppd(dip); - - /* undo non-hardened drivers */ - if (ppd_p->ppd_fm_flags & PF_IS_NH) { - ppd_p->ppd_fm_flags &= ~PF_IS_NH; - ddi_fm_fini(dip); - } - - /* no other code should set the flag to false */ - ppd_p->ppd_fm_flags &= ~PF_FM_READY; - while (pf_held(dip)); - mutex_destroy(&ppd_p->ppd_fm_lock); -} - -/* Returns whether the "bdf" is in the bus range of a switch/bridge */ -static boolean_t -pf_in_bus_range(pcie_ppd_t *ppd_p, pcie_req_id_t bdf) -{ - pci_bus_range_t *br_p = &ppd_p->ppd_bus_range; - uint16_t hdr_type = ppd_p->ppd_hdr_type; - uint8_t bus_no = (bdf & PCIE_REQ_ID_BUS_MASK) >> - PCIE_REQ_ID_BUS_SHIFT; - - /* check if given bdf falls within bridge's bus range */ - if ((hdr_type == PCI_HEADER_ONE) && - ((bus_no >= br_p->lo) && (bus_no <= br_p->hi))) - return (B_TRUE); - else - return (B_FALSE); -} - -/* - * Returns whether the "addr" is in the addr range of a switch/bridge, or if the - * "addr" is in the assigned addr of a device. - */ -static boolean_t -pf_in_addr_range(pcie_ppd_t *ppd_p, uint32_t addr) -{ - uint_t i, low, hi; - ppb_ranges_t *ranges_p = ppd_p->ppd_addr_ranges; - pci_regspec_t *assign_p = ppd_p->ppd_assigned_addr; - - /* check if given address belongs to this device */ - for (i = 0; i < ppd_p->ppd_assigned_entries; i++, assign_p++) { - low = assign_p->pci_phys_low; - hi = low + assign_p->pci_size_low; - if ((addr < hi) && (addr >= low)) - return (B_TRUE); - } - - /* check if given address belongs to a child below this device */ - if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) { - for (i = 0; i < ppd_p->ppd_addr_entries; i++, ranges_p++) { - if (ranges_p->child_high & PCI_ADDR_MEM32) { - low = ranges_p->child_low; - hi = low + ranges_p->size_low; - if ((addr < hi) && (addr >= low)) - return (B_TRUE); - break; - } - } - } - - return (B_FALSE); -} - -int -pf_pci_dispatch(dev_info_t *pdip, pf_impl_t *impl) -{ - dev_info_t *dip; - pcie_ppd_t *ppd_p; - int sts = 0, ret = 0; - - /* for bridge, check all downstream */ - dip = ddi_get_child(pdip); - for (; dip; dip = ddi_get_next_sibling(dip)) { - /* make sure dip is attached, ie. fm_ready */ - if (!(ppd_p = pcie_get_ppd(dip)) || - !pf_enter(dip)) - continue; - - sts = pf_default_hdl(dip, pdip, ppd_p, impl); - ret |= (sts & PF_FAILURE) ? DDI_FAILURE : DDI_SUCCESS; - - if (sts & PF_DO_NOT_SCAN) - continue; - - if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) - ret |= pf_pci_dispatch(dip, impl); - } - return (ret); -} - -int -pf_pcie_dispatch(dev_info_t *pdip, pf_impl_t *impl) -{ - dev_info_t *dip; - pcie_req_id_t rid = impl->pf_fbdf; - pcie_ppd_t *ppd_p; - int sts, ret = DDI_SUCCESS; - - dip = ddi_get_child(pdip); - for (; dip; dip = ddi_get_next_sibling(dip)) { - /* Make sure dip is attached and fm_ready */ - if (!(ppd_p = pcie_get_ppd(dip)) || - !pf_enter(dip)) - continue; - - if ((ppd_p->ppd_bdf == rid) || - pf_in_bus_range(ppd_p, rid) || - pf_in_addr_range(ppd_p, impl->pf_faddr)) { - sts = pf_default_hdl(dip, pdip, ppd_p, impl); - - ret |= (sts & PF_FAILURE) ? DDI_FAILURE : DDI_SUCCESS; - - if (sts & PF_DO_NOT_SCAN) - continue; - } else { - pf_exit(dip); - continue; - } - - /* match or in bridge bus-range */ - switch (ppd_p->ppd_dev_type) { - case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: - ret |= pf_pci_dispatch(dip, impl); - return (ret); - case PCIE_PCIECAP_DEV_TYPE_UP: - case PCIE_PCIECAP_DEV_TYPE_DOWN: - if (ppd_p->ppd_bdf != rid) - ret |= pf_pcie_dispatch(dip, impl); - /* FALLTHROUGH */ - case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: - return (ret); - case PCIE_PCIECAP_DEV_TYPE_ROOT: - default: - ASSERT(B_FALSE); - } - } - return (ret); -} - -/* - * Called by the RC to scan the fabric. - * - * After all the necessary fabric devices are scanned, the error queue will be - * analyzed for error severity and ereports will be sent. - */ -int -pf_scan_fabric(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *dq_p, int *dq_tail_p) -{ - pf_impl_t impl; - pf_data_t *rc_pf_data_p; - int i, sts, ret = DDI_SUCCESS; - int last_rc_index = *dq_tail_p; - - impl.pf_rpdip = rpdip; - impl.pf_derr = derr; - impl.pf_dq_p = dq_p; - impl.pf_dq_tail_p = dq_tail_p; - - i = 0; - - /* - * Scan the fabric using the fault_bdf and fault_addr in error q. - * fault_bdf will be valid in the following cases: - * - Fabric message - * - Poisoned TLP - * - Signaled UR/CA - * - Received UR/CA - * - PIO load failures - */ - for (rc_pf_data_p = dq_p; IS_RC(rc_pf_data_p) && i <= last_rc_index; - rc_pf_data_p++, i++) { - impl.pf_fbdf = rc_pf_data_p->fault_bdf; - impl.pf_faddr = rc_pf_data_p->fault_addr; - - if ((impl.pf_fbdf && pf_find_in_q(impl.pf_fbdf, dq_p, - *dq_tail_p) == PF_DATA_NOT_FOUND) || - (!impl.pf_fbdf && impl.pf_faddr)) - ret |= pf_pcie_dispatch(rpdip, &impl); - } - - /* If this is due to safe access, don't analyse the errors and return */ - if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { - ret = DDI_SUCCESS; - sts = PF_NO_PANIC; - } else { - sts = pf_analyse_error(rpdip, derr, dq_p, *dq_tail_p); - pf_check_ce(dq_p, *dq_tail_p); - } - - pf_send_ereport(rpdip, derr, dq_p, *dq_tail_p); - *dq_tail_p = -1; - - /* - * If ret is not SUCCESS that means we were not able to add 1 or more - * devices to the fault q. Since that device could have have been the - * one which had a error, be conservative and panic here. - */ - if (ret != DDI_SUCCESS) - return (PF_PANIC | sts); - else - return (sts); -} - -/* - * For each device in the fault queue ensure that no ereport is sent if that - * device was scanned as a result of a CE in one of its children. - */ -void -pf_check_ce(pf_data_t *dq_p, int dq_tail) { - int i = dq_tail; - pf_data_t *pf_data_p; - - for (pf_data_p = &dq_p[dq_tail]; i >= 0; pf_data_p = &dq_p[--i]) { - if (pf_data_p->send_erpt == PF_SEND_ERPT_UNKNOWN) { - /* - * Always send ereport for the last device in a - * particular scan path. - */ - pf_data_p->send_erpt = PF_SEND_ERPT_YES; - - if (pf_data_p->severity_flags == (PF_CE | - PF_NO_ERROR)) { - /* - * Since this device had a CE don't send ereport - * for parents. - */ - pf_set_parent_erpt(dq_p, - pf_data_p->parent_index, PF_SEND_ERPT_NO); - } else { - /* Send ereports for all parents */ - pf_set_parent_erpt(dq_p, - pf_data_p->parent_index, PF_SEND_ERPT_YES); - } - } - } - -} - -void -pf_set_parent_erpt(pf_data_t *dq_p, int index, int erpt_val) { - int i; - pf_data_t *pf_data_p; - - for (i = index; i != PF_DATA_NOT_FOUND; i = pf_data_p->parent_index) { - pf_data_p = &dq_p[i]; - - if (pf_data_p->send_erpt != PF_SEND_ERPT_YES) - pf_data_p->send_erpt = erpt_val; - - } -} - -/* - * Returns the index of the bdf if found in the PCIe Fault Data Queue - * Returns PF_DATA_NOT_FOUND of the index if the bdf is not found. - * This function should not be called by RC. - */ -static int -pf_find_in_q(pcie_req_id_t bdf, pf_data_t *dq_p, int dq_tail) -{ - int i; - - /* Check if this is the first item in queue */ - if (dq_tail == -1) - return (PF_DATA_NOT_FOUND); - - for (i = dq_tail; i >= 0; i--) { - if (dq_p[i].bdf == bdf) - return (i); - } - - return (PF_DATA_NOT_FOUND); -} - -int -pf_get_dq_size() -{ - return (pf_dq_size); -} - -/* - * Add PFD to queue. - * Return true if successfully added. - * Return false if out of space or already in queue. - * Pass in pbdf = -1 if pfd is from RC. - */ -int -pf_en_dq(pf_data_t *pf_data_p, pf_data_t *dq_p, int *dq_tail_p, - pcie_req_id_t pbdf) -{ - int parent_index = PF_DATA_NOT_FOUND; - - if (*dq_tail_p >= (int)pf_dq_size) - return (DDI_FAILURE); - - /* Look for parent BDF if pfd is not from RC */ - if (pbdf != (uint16_t)0xFFFF) - parent_index = pf_find_in_q(pbdf, dq_p, *dq_tail_p); - - *dq_tail_p += 1; - dq_p[*dq_tail_p] = *pf_data_p; - dq_p[*dq_tail_p].parent_index = parent_index; - return (DDI_SUCCESS); -} - -/* Load PCIe Fault Data for PCI/PCIe devices into PCIe Fault Data Queue */ -static int -pf_default_hdl(dev_info_t *dip, dev_info_t *pdip, - pcie_ppd_t *ppd_p, pf_impl_t *impl) -{ - ddi_acc_handle_t h = ppd_p->ppd_cfg_hdl; - pf_data_t pf_data = {0}; - pcie_req_id_t pbdf; - uint16_t pcie_off, aer_off, pcix_off; - uint8_t hdr_type, dev_type; - int cb_sts, sts = PF_SUCCESS; - - pbdf = PCI_GET_BDF(pdip); - pf_data.bdf = PCI_GET_BDF(dip); - - /* Make sure this device hasn't already been snapshotted and cleared */ - if (pf_find_in_q(pf_data.bdf, impl->pf_dq_p, *impl->pf_dq_tail_p) != - PF_DATA_NOT_FOUND) - return (PF_SUCCESS); - - pf_data.dip = dip; - pf_data.bdg_secbus = ppd_p->ppd_bdg_secbus << 8; - pf_data.vendor_id = ppd_p->ppd_dev_ven_id & 0xFFFF; - pf_data.device_id = ppd_p->ppd_dev_ven_id >> 16; - pf_data.send_erpt = PF_SEND_ERPT_UNKNOWN; - - /* - * Read vendor/device ID and check with cached data, if it doesn't match - * could very well be a device that isn't responding anymore. Just - * stop. Save the basic info in the error q for post mortem debugging - * purposes. - */ - if (pci_config_get32(h, PCI_CONF_VENID) != ppd_p->ppd_dev_ven_id) { - (void) pf_en_dq(&pf_data, impl->pf_dq_p, impl->pf_dq_tail_p, - pbdf); - return (DDI_FAILURE); - } - - hdr_type = ppd_p->ppd_hdr_type; - dev_type = ppd_p->ppd_dev_type; - - pf_data.hdr_type = hdr_type; - pf_data.command = pci_config_get16(h, PCI_CONF_COMM); - pf_data.status = pci_config_get16(h, PCI_CONF_STAT); - pf_data.rev_id = pci_config_get8(h, PCI_CONF_REVID); - pcie_off = ppd_p->ppd_pcie_off; - aer_off = ppd_p->ppd_aer_off; - - if (hdr_type == PCI_HEADER_ONE) { - pf_data.s_status = pci_config_get16(h, PCI_BCNF_SEC_STATUS); - } - - pf_data.dev_type = dev_type; - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV) { - if (pci_lcap_locate(h, PCI_CAP_ID_PCIX, &pcix_off) - != DDI_FAILURE) { - pf_data.pcix_s_status = pci_config_get16(h, - pcix_off + PCI_PCIX_SEC_STATUS); - pf_data.pcix_bdg_status = pci_config_get32(h, - pcix_off + PCI_PCIX_BDG_STATUS); - } - goto clear; - } - - if (!pcie_off) - goto clear; - - pf_data.dev_status = PCI_CAP_GET16(h, NULL, pcie_off, PCIE_DEVSTS); - pf_data.pcie_off = pcie_off; - - /* - * If a bridge does not have any error no need to scan any further down. - * For PCIe devices, check the PCIe device status and PCI secondary - * status. - * - Some non-compliant PCIe devices do not utilize PCIe - * error registers. If so rely on legacy PCI error registers. - * For PCI devices, check the PCI secondary status. - */ - if (hdr_type == PCI_HEADER_ONE) { - if ((dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) && - !(pf_data.dev_status & PF_PCIE_BDG_ERR) && - !(pf_data.s_status & PF_PCI_BDG_ERR)) - sts |= PF_DO_NOT_SCAN; - - if ((dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV) && - !(pf_data.s_status & PF_PCI_BDG_ERR)) - sts |= PF_DO_NOT_SCAN; - } - - if (!aer_off) - goto clear; - - pf_data.aer_off = aer_off; - pf_data.aer_ce_status = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_CE_STS); - pf_data.aer_ue_status = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_UCE_STS); - pf_data.aer_severity = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_UCE_SERV); - pf_data.aer_control = PCI_XCAP_GET32(h, NULL, aer_off, PCIE_AER_CTL); - pf_data.aer_h0 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_HDR_LOG + 0x0); - pf_data.aer_h1 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_HDR_LOG + 0x4); - pf_data.aer_h2 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_HDR_LOG + 0x8); - pf_data.aer_h3 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_HDR_LOG + 0xc); - - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { - pf_data.s_aer_ue_status = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SUCE_STS); - pf_data.s_aer_severity = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SUCE_SERV); - pf_data.s_aer_control = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SCTL); - pf_data.s_aer_h0 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SHDR_LOG + 0x0); - pf_data.s_aer_h1 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SHDR_LOG + 0x4); - pf_data.s_aer_h2 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SHDR_LOG + 0x8); - pf_data.s_aer_h3 = PCI_XCAP_GET32(h, NULL, aer_off, - PCIE_AER_SHDR_LOG + 0xc); - } - -clear: - /* Clear the Legacy PCI Errors */ - pci_config_put16(h, PCI_CONF_STAT, pf_data.status); - - if (hdr_type == PCI_HEADER_ONE) - pci_config_put16(h, PCI_BCNF_SEC_STATUS, pf_data.s_status); - - if (!pcie_off) - goto queue; - - /* Clear the Advanced PCIe Errors */ - if (aer_off) { - PCI_XCAP_PUT32(h, NULL, aer_off, PCIE_AER_CE_STS, - pf_data.aer_ce_status); - PCI_XCAP_PUT32(h, NULL, aer_off, PCIE_AER_UCE_STS, - pf_data.aer_ue_status); - - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) - PCI_XCAP_PUT32(h, NULL, aer_off, - PCIE_AER_SUCE_STS, pf_data.s_aer_ue_status); - } - - /* Clear the PCIe Errors */ - PCI_CAP_PUT16(h, PCI_CAP_ID_PCI_E, pcie_off, PCIE_DEVSTS, - pf_data.dev_status); - -queue: - /* - * If the driver is FMA hardened and callback capable, call it's - * callback function - */ - if (DDI_FM_ERRCB_CAP(ddi_fm_capable(dip))) { - cb_sts = ndi_fm_handler_dispatch(pdip, dip, impl->pf_derr); - if (cb_sts == DDI_FM_FATAL || cb_sts == DDI_FM_UNKNOWN) - sts |= PF_FAILURE; - else - sts |= PF_SUCCESS; - } - - /* Add the snapshot to the error q */ - if (pf_en_dq(&pf_data, impl->pf_dq_p, impl->pf_dq_tail_p, pbdf) == - DDI_FAILURE) - sts |= PF_FAILURE; - - return (sts); -} - -/* - * Function used by PCI error handlers to check if captured address is stored - * in the DMA or ACC handle caches. - * return: PF_HDL_NOTFOUND if a handle is not found - * PF_HDL_FOUND if a handle is found - */ -int -pf_hdl_lookup(dev_info_t *dip, uint64_t ena, uint32_t flag, uint32_t addr, - pcie_req_id_t bdf) -{ - ddi_fm_error_t derr; - int found = 0; - - /* If we don't know the addr or rid just return with UNKNOWN */ - if (addr == NULL && bdf == NULL) - return (PF_HDL_NOTFOUND); - - if (!(flag & (PF_DMA_ADDR | PF_PIO_ADDR | PF_CFG_ADDR))) { - return (PF_HDL_NOTFOUND); - } - - bzero(&derr, sizeof (ddi_fm_error_t)); - derr.fme_version = DDI_FME_VERSION; - derr.fme_flag = DDI_FM_ERR_UNEXPECTED; - derr.fme_ena = ena; - - /* If we know the addr or bdf mark the handle as failed */ - if (flag & PF_DMA_ADDR) { - if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, - pf_dma_hdl_check) != PF_HDL_NOTFOUND) - found++; - } - if (flag & PF_PIO_ADDR) { - if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, - pf_pio_hdl_check) != PF_HDL_NOTFOUND) - found++; - } - if (flag & PF_CFG_ADDR) { - if (pf_hdl_child_lookup(dip, dip, &derr, addr, bdf, - pf_cfg_hdl_check) != PF_HDL_NOTFOUND) - found++; - } - - return (found ? PF_HDL_FOUND : PF_HDL_NOTFOUND); -} - -/* - * Recursively search the tree for the handler that matches the given address. - * If the BDF is known, only check the handlers that are associated with the - * given BDF, otherwise search the entire tree. - */ -static int -pf_hdl_child_lookup(dev_info_t *rpdip, dev_info_t *dip, - ddi_fm_error_t *derr, uint32_t addr, pcie_req_id_t bdf, - pf_hdl_compare_t cf) -{ - int status = PF_HDL_NOTFOUND; - struct i_ddi_fmhdl *fmhdl; - struct i_ddi_fmtgt *tgt; - pcie_req_id_t child_bdf; - - child_bdf = PCI_GET_BDF(dip); - - i_ddi_fm_handler_enter(dip); - fmhdl = DEVI(dip)->devi_fmhdl; - ASSERT(fmhdl); - - /* Check if dip and BDF match, if not recurse to it's children. */ - if (bdf == NULL || child_bdf == bdf) { - /* If we found the handler stop the search */ - if ((status = cf(fmhdl, derr, addr, bdf)) != PF_HDL_NOTFOUND) - goto done; - } - - /* If we can't find the handler check it's children */ - for (tgt = fmhdl->fh_tgts; tgt != NULL; tgt = tgt->ft_next) { - if ((status = pf_hdl_child_lookup(rpdip, tgt->ft_dip, derr, - addr, bdf, cf)) != PF_HDL_NOTFOUND) - goto done; - } - -done: - i_ddi_fm_handler_exit(dip); - - return (status); -} - -/* - * Find and Mark CFG Handles as failed associated with the given BDF. We should - * always know the BDF for CFG accesses, since it is encoded in the address of - * the TLP. Since there can be multiple cfg handles, mark them all as failed. - */ -/* ARGSUSED */ -static int -pf_cfg_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, - uint32_t notused, pcie_req_id_t bdf) -{ - ndi_fmc_t *fcp; - ndi_fmcentry_t *fep; - ddi_acc_handle_t ap; - ddi_acc_hdl_t *hp; - int status = PF_HDL_NOTFOUND; - - ASSERT(bdf); - - /* Return NOTFOUND if this driver doesn't support ACC flagerr */ - if (!DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap) || - ((fcp = fmhdl->fh_acc_cache) == NULL)) - return (PF_HDL_NOTFOUND); - - mutex_enter(&fcp->fc_lock); - for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { - ap = fep->fce_resource; - hp = impl_acc_hdl_get(ap); - - /* CFG space is always reg 0 */ - if (hp->ah_rnumber == 0) { - i_ddi_fm_acc_err_set(ap, derr->fme_ena, DDI_FM_NONFATAL, - DDI_FM_ERR_UNEXPECTED); - ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); - derr->fme_acc_handle = ap; - status = PF_HDL_FOUND; - } - } - mutex_exit(&fcp->fc_lock); - - return (status); -} - -/* - * Find and Mark all ACC Handles associated with a give address and BDF as - * failed. If the BDF != NULL, then check to see if the device has a ACC Handle - * associated with ADDR. If the handle is not found, mark all the handles as - * failed. If the BDF == NULL, mark the handle as failed if it is associated - * with ADDR. - */ -static int -pf_pio_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, - uint32_t addr, pcie_req_id_t bdf) -{ - ndi_fmc_t *fcp; - ndi_fmcentry_t *fep; - ddi_acc_handle_t ap; - ddi_acc_hdl_t *hp; - uint32_t base_addr; - uint_t size; - int status = PF_HDL_NOTFOUND; - - if (!DDI_FM_ACC_ERR_CAP(fmhdl->fh_cap) || - ((fcp = fmhdl->fh_acc_cache) == NULL)) - return (PF_HDL_NOTFOUND); - - mutex_enter(&fcp->fc_lock); - for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { - ap = fep->fce_resource; - hp = impl_acc_hdl_get(ap); - - /* CFG space is always reg 0, don't mark config handlers. */ - if (hp->ah_rnumber == 0) - continue; - - /* - * Normalize the base addr to the addr and strip off the - * HB info. All PIOs are 32 bit access only. - */ - base_addr = (uint32_t)(hp->ah_pfn << MMU_PAGESHIFT) + - hp->ah_offset; - size = hp->ah_len; - - if (((addr >= base_addr) && (addr < (base_addr + size))) || - ((addr == NULL) && (bdf != NULL))) { - - status = PF_HDL_FOUND; - - i_ddi_fm_acc_err_set(ap, derr->fme_ena, DDI_FM_NONFATAL, - DDI_FM_ERR_UNEXPECTED); - ddi_fm_acc_err_get(ap, derr, DDI_FME_VERSION); - derr->fme_acc_handle = ap; - } - } - mutex_exit(&fcp->fc_lock); - - /* - * If no handles found and we know this is the right device mark - * all the handles as failed. - */ - if (addr && bdf != NULL && status == PF_HDL_NOTFOUND) - status = pf_pio_hdl_check(fmhdl, derr, NULL, bdf); - - return (status); -} - -/* - * Find and Mark all DNA Handles associated with a give address and BDF as - * failed. If the BDF != NULL, then check to see if the device has a DMA Handle - * associated with ADDR. If the handle is not found, mark all the handles as - * failed. If the BDF == NULL, mark the handle as failed if it is associated - * with ADDR. - */ -static int -pf_dma_hdl_check(struct i_ddi_fmhdl *fmhdl, ddi_fm_error_t *derr, - uint32_t addr, pcie_req_id_t bdf) -{ - ndi_fmc_t *fcp; - ndi_fmcentry_t *fep; - ddi_dma_impl_t *pcie_dp; - ddi_dma_handle_t dp; - int status = PF_HDL_NOTFOUND; - uint32_t base_addr; - uint_t size; - - if (!DDI_FM_DMA_ERR_CAP(fmhdl->fh_cap) || - ((fcp = fmhdl->fh_dma_cache) == NULL)) - return (PF_HDL_NOTFOUND); - - mutex_enter(&fcp->fc_lock); - for (fep = fcp->fc_active->fce_next; fep; fep = fep->fce_next) { - pcie_dp = (ddi_dma_impl_t *)fep->fce_resource; - dp = (ddi_dma_handle_t)fep->fce_resource; - base_addr = (uint32_t)pcie_dp->dmai_mapping; - size = pcie_dp->dmai_size; - - /* - * Mark the handle as failed if the ADDR is mapped, or if we - * know the BDF and ADDR == 0. - */ - if (((addr >= base_addr) && (addr < (base_addr + size))) || - ((addr == NULL) && (bdf != NULL))) { - - status = PF_HDL_FOUND; - - i_ddi_fm_dma_err_set(dp, derr->fme_ena, DDI_FM_NONFATAL, - DDI_FM_ERR_UNEXPECTED); - ddi_fm_dma_err_get(dp, derr, DDI_FME_VERSION); - derr->fme_dma_handle = dp; - } - } - mutex_exit(&fcp->fc_lock); - - /* - * If no handles found and we know this is the right device mark - * all the handles as failed. - */ - if (addr && bdf != NULL && status == PF_HDL_NOTFOUND) - status = pf_dma_hdl_check(fmhdl, derr, NULL, bdf); - - return (status); -} - -/* - * If a PCIe device does not support AER, assume all AER statuses have been set, - * unless other registers do not indicate a certain error occuring. - */ -static void -pf_adjust_for_no_aer(pf_data_t *pf_data_p) -{ - uint32_t aer_ue = 0; - - if (pf_data_p->aer_off) - return; - - if (pf_data_p->dev_status & PCIE_DEVSTS_FE_DETECTED) { - aer_ue = PF_AER_FATAL_ERR; - } else if (pf_data_p->dev_status & PCIE_DEVSTS_NFE_DETECTED) { - aer_ue = PF_AER_NON_FATAL_ERR; - /* Check if the device received a PTLP */ - if (!(pf_data_p->status & PCI_STAT_PERROR)) - aer_ue &= ~PCIE_AER_UCE_PTLP; - - /* Check if the device signaled a CA */ - if (!(pf_data_p->status & PCI_STAT_S_TARG_AB)) - aer_ue &= ~PCIE_AER_UCE_CA; - - /* Check if the device sent a UR */ - if ((!pf_data_p->dev_status & PCIE_DEVSTS_UR_DETECTED)) - aer_ue &= ~PCIE_AER_UCE_UR; - - /* - * Ignore ECRCs as it is optional and will manefest itself as - * another error like PTLP and MFP - */ - aer_ue &= ~PCIE_AER_UCE_ECRC; - } - - if (pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) { - aer_ue &= ~PCIE_AER_UCE_TRAINING; - aer_ue &= ~PCIE_AER_UCE_SD; - } - pf_data_p->aer_ue_status = aer_ue; -} - -static void -pf_adjust_for_no_saer(pf_data_t *pf_data_p) -{ - uint32_t s_aer_ue = 0; - - if (pf_data_p->aer_off) - return; - - if (pf_data_p->dev_status & PCIE_DEVSTS_FE_DETECTED) { - s_aer_ue = PF_SAER_FATAL_ERR; - } else if (pf_data_p->dev_status & PCIE_DEVSTS_NFE_DETECTED) { - s_aer_ue = PF_SAER_NON_FATAL_ERR; - /* Check if the device received a UC_DATA */ - if (!(pf_data_p->s_status & PCI_STAT_PERROR)) - s_aer_ue &= ~PCIE_AER_SUCE_UC_DATA_ERR; - - /* Check if the device received a RCVD_MA/MA_ON_SC */ - if (!(pf_data_p->s_status & (PCI_STAT_R_MAST_AB))) { - s_aer_ue &= ~PCIE_AER_SUCE_RCVD_MA; - s_aer_ue &= ~PCIE_AER_SUCE_MA_ON_SC; - } - - /* Check if the device received a RCVD_TA/TA_ON_SC */ - if (!(pf_data_p->s_status & (PCI_STAT_R_TARG_AB))) { - s_aer_ue &= ~PCIE_AER_SUCE_RCVD_TA; - s_aer_ue &= ~PCIE_AER_SUCE_TA_ON_SC; - } - } - pf_data_p->s_aer_ue_status = s_aer_ue; -} - -/* Find the PCIe-PCI bridge of a PCI device */ -static pf_data_t * -pf_get_parent_pcie_bridge(pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - pf_data_t *bdg_pf_data_p; - - ASSERT(pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCI_DEV); - - if (pf_data_p->parent_index == PF_DATA_NOT_FOUND) - return (NULL); - - for (bdg_pf_data_p = &dq_p[pf_data_p->parent_index]; - bdg_pf_data_p->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI; - bdg_pf_data_p = &dq_p[bdg_pf_data_p->parent_index]) { - if (!bdg_pf_data_p || (bdg_pf_data_p->parent_index == - PF_DATA_NOT_FOUND)) - return (NULL); - } - - return (bdg_pf_data_p); -} - -/* - * See if a leaf error was bubbled up to the RC and handled. - * Check if the RC logged an error with the appropriate status type/abort type. - * Ex: Parity Error, Received Master/Target Abort - * Check if either the fault address found in the rc matches the device's - * assigned address range (PIO's only) or the fault BDF in the rc matches the - * device's BDF or Secondary Bus. - */ -static boolean_t -pf_matched_in_rc(pf_data_t *dq_p, pf_data_t *pf_data_p, uint32_t abort_type) -{ - pf_data_t *rc_pf_data_p; - pcie_ppd_t *ppd_p; - - ppd_p = pcie_get_ppd(pf_data_p->dip); - for (rc_pf_data_p = dq_p; IS_RC(rc_pf_data_p); rc_pf_data_p++) { - /* If device and rc abort type does not match continue */ - if (!(rc_pf_data_p->s_status & abort_type)) - continue; - - /* The Fault BDF = Device's BDF */ - if (rc_pf_data_p->fault_bdf == pf_data_p->bdf) - return (B_TRUE); - - /* The Fault Addr is in device's address range */ - if (pf_in_addr_range(ppd_p, rc_pf_data_p->fault_addr)) - return (B_TRUE); - - /* The Fault BDF is from PCIe-PCI Bridge's secondary bus */ - if ((pf_data_p->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) && - ((rc_pf_data_p->fault_bdf & PCIE_REQ_ID_BUS_MASK) == - pf_data_p->bdg_secbus)) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * Decodes the TLP and returns the BDF of the handler, address and transaction - * type if known. - * - * Types of TLP logs seen in RC, and what to extract: - * - * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR - * Memory(PIO) - address, PF_PIO_ADDR - * CFG - Should not occur and result in UR - * Completion(DMA) - Requester BDF, PF_DMA_ADDR - * Completion(PIO) - Requester BDF, PF_PIO_ADDR - * - * Types of TLP logs seen in SW/Leaf, and what to extract: - * - * Memory(DMA) - Requester BDF, address, PF_DMA_ADDR - * Memory(PIO) - address, PF_PIO_ADDR - * CFG - Destined BDF, address, PF_CFG_ADDR - * Completion(DMA) - Requester BDF, PF_DMA_ADDR - * Completion(PIO) - Requester BDF, PF_PIO_ADDR - * - * If the TLP can be decoded the *bdf, *addr, and *trans_type will be populated - * with the TLP information. The caller may pass in NULL for any of the - * mentioned variables, if they are not interested in them. - */ -int -pf_tlp_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, pcie_req_id_t *bdf, - uint32_t *addr, uint32_t *trans_type) -{ - pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)&pf_data_p->aer_h0; - pcie_req_id_t rp_bdf, rid_bdf, tlp_bdf; - uint32_t tlp_addr, tlp_trans_type; - - if (pcie_get_bdf_from_dip(rpdip, &rp_bdf) != DDI_SUCCESS) - rp_bdf = (pcie_req_id_t)-1; - - switch (tlp_hdr->type) { - case PCIE_TLP_TYPE_IO: - case PCIE_TLP_TYPE_MEM: - case PCIE_TLP_TYPE_MEMLK: - tlp_addr = pf_data_p->aer_h3; - /* If the RID_BDF == RP_BDF, PIO, otherwise DMA */ - rid_bdf = (pcie_req_id_t)(pf_data_p->aer_h1 >> 16); - if (rid_bdf == rp_bdf) { - tlp_trans_type = PF_PIO_ADDR; - tlp_bdf = NULL; - } else { - tlp_trans_type = PF_DMA_ADDR; - tlp_bdf = rid_bdf; - } - break; - case PCIE_TLP_TYPE_CFG0: - case PCIE_TLP_TYPE_CFG1: - tlp_addr = 0; - tlp_bdf = (pcie_req_id_t)(pf_data_p->aer_h2 >> 16); - tlp_trans_type = PF_CFG_ADDR; - break; - case PCIE_TLP_TYPE_CPL: - case PCIE_TLP_TYPE_CPLLK: - tlp_addr = NULL; - /* - * If the completer bdf == RP_BDF, DMA, otherwise PIO or a CFG - * completion. - */ - tlp_bdf = (pcie_req_id_t)(pf_data_p->aer_h1 >> 16); - if (tlp_bdf == rp_bdf) - tlp_trans_type = PF_DMA_ADDR; - else - tlp_trans_type = PF_PIO_ADDR | PF_CFG_ADDR; - break; - default: - return (DDI_FAILURE); - } - - if (addr) - *addr = tlp_addr; - if (trans_type) - *trans_type = tlp_trans_type; - if (bdf) - *bdf = tlp_bdf; - - return (DDI_SUCCESS); -} - -/* - * pf_pci_decode function decodes the secondary aer transaction logs in - * PCIe-PCI bridges. - * - * The log is 128 bits long and arranged in this manner. - * [0:35] Transaction Attribute (s_aer_h0-saer_h1) - * [36:39] Transaction lower command (saer_h1) - * [40:43] Transaction upper command (saer_h1) - * [44:63] Reserved - * [64:127] Address (saer_h2-saer_h3) - */ -static int -pf_pci_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, uint16_t *cmd, - pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type) { - pcix_attr_t *attr; - pcie_req_id_t rp_bdf; - - if (pcie_get_bdf_from_dip(rpdip, &rp_bdf) != DDI_SUCCESS) - rp_bdf = (pcie_req_id_t)-1; - - *cmd = GET_SAER_CMD(pf_data_p); - - switch (*cmd) { - case PCI_PCIX_CMD_MEMRD_DW: - case PCI_PCIX_CMD_MEMRD_BL: - case PCI_PCIX_CMD_MEMRDBL: - case PCI_PCIX_CMD_MEMWR: - case PCI_PCIX_CMD_MEMWR_BL: - case PCI_PCIX_CMD_MEMWRBL: - *addr = pf_data_p->s_aer_h2; - attr = (pcix_attr_t *)&pf_data_p->s_aer_h0; - - /* - * Could be DMA or PIO. Find out by look at requesting bdf. - * If the requester is the RC, then it's a PIO, otherwise, DMA - */ - *bdf = attr->rid; - if (*bdf == rp_bdf) { - *trans_type = PF_PIO_ADDR; - *bdf = 0; - } else { - *trans_type = PF_DMA_ADDR; - } - break; - case PCI_PCIX_CMD_CFRD: - case PCI_PCIX_CMD_CFWR: - /* - * CFG Access should always be down stream. Match the BDF in - * the address phase. - */ - *addr = 0; - attr = (pcix_attr_t *)&pf_data_p->s_aer_h2; - *bdf = attr->rid; - *trans_type = PF_CFG_ADDR; - break; - case PCI_PCIX_CMD_SPL: - /* - * Check for DMA read completions. The requesting BDF is in the - * Address phase. - */ - *addr = 0; - attr = (pcix_attr_t *)&pf_data_p->s_aer_h0; - *bdf = attr->rid; - *trans_type = PF_DMA_ADDR; - break; - default: - *addr = 0; - *bdf = 0; - *trans_type = 0; - return (DDI_FAILURE); - } - return (DDI_SUCCESS); -} - -/* - * For this function only the Primary AER Header Logs need to be valid in the - * pfd (PCIe Fault Data) arg. - */ -int -pf_tlp_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pf_data_p) -{ - uint32_t addr; - int err = PF_HDL_NOTFOUND; - pcie_req_id_t hdl_bdf; - uint32_t trans_type; - - if (pf_tlp_decode(rpdip, pf_data_p, &hdl_bdf, &addr, &trans_type) == - DDI_SUCCESS) { - err = pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, - hdl_bdf); - } - - return (err); -} - -/* - * Last function called for PF Scan Fabric. - * Sends ereports for all devices that are not dev_type = RC. - * Will also unlock all the mutexes grabbed during fabric scan. - */ -/* ARGSUSED */ -static void -pf_send_ereport(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, - int dq_tail) -{ - char buf[FM_MAX_CLASS]; - pf_data_t *pfd_p; - int i, total = dq_tail; - - i = 0; - for (pfd_p = dq_p; IS_RC(pfd_p) && i <= dq_tail; pfd_p++, i++) { - total--; - } - - i = dq_tail; - for (pfd_p = &dq_p[dq_tail]; i >= 0; pfd_p--, i--) { - if (IS_RC(pfd_p)) - continue; - - if (pfd_p->send_erpt == PF_SEND_ERPT_NO) - goto unlock; - - if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) - goto unlock; - - (void) snprintf(buf, FM_MAX_CLASS, "%s", "fire.fabric"); - ddi_fm_ereport_post(pfd_p->dip, buf, derr->fme_ena, - DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, - "req_id", DATA_TYPE_UINT16, pfd_p->bdf, - "device_id", DATA_TYPE_UINT16, pfd_p->device_id, - "vendor_id", DATA_TYPE_UINT16, pfd_p->vendor_id, - "rev_id", DATA_TYPE_UINT8, pfd_p->rev_id, - "dev_type", DATA_TYPE_UINT16, pfd_p->dev_type, - "cap_off", DATA_TYPE_UINT16, pfd_p->pcie_off, - "aer_off", DATA_TYPE_UINT16, pfd_p->aer_off, - "sts_reg", DATA_TYPE_UINT16, pfd_p->status, - "sts_sreg", DATA_TYPE_UINT16, pfd_p->s_status, - "pcix_sts_reg", DATA_TYPE_UINT16, pfd_p->pcix_s_status, - "pcix_bdg_sts_reg", DATA_TYPE_UINT32, - pfd_p->pcix_bdg_status, - "dev_sts_reg", DATA_TYPE_UINT16, pfd_p->dev_status, - "aer_ce", DATA_TYPE_UINT32, pfd_p->aer_ce_status, - "aer_ue", DATA_TYPE_UINT32, pfd_p->aer_ue_status, - "aer_sev", DATA_TYPE_UINT32, pfd_p->aer_severity, - "aer_ctr", DATA_TYPE_UINT32, pfd_p->aer_control, - "aer_h1", DATA_TYPE_UINT32, pfd_p->aer_h0, - "aer_h2", DATA_TYPE_UINT32, pfd_p->aer_h1, - "aer_h3", DATA_TYPE_UINT32, pfd_p->aer_h2, - "aer_h4", DATA_TYPE_UINT32, pfd_p->aer_h3, - "saer_ue", DATA_TYPE_UINT32, pfd_p->s_aer_ue_status, - "saer_sev", DATA_TYPE_UINT32, pfd_p->s_aer_severity, - "saer_ctr", DATA_TYPE_UINT32, pfd_p->s_aer_control, - "saer_h1", DATA_TYPE_UINT32, pfd_p->s_aer_h0, - "saer_h2", DATA_TYPE_UINT32, pfd_p->s_aer_h1, - "saer_h3", DATA_TYPE_UINT32, pfd_p->s_aer_h2, - "saer_h4", DATA_TYPE_UINT32, pfd_p->s_aer_h3, - "remainder", DATA_TYPE_UINT32, total--, - "severity", DATA_TYPE_UINT32, pfd_p->severity_flags, - NULL); - -unlock: - pf_exit(pfd_p->dip); - } -} - -/* - * Ignore: - * - TRAINING: as leaves do not have children - * - SD: as leaves do not have children - */ -const pf_fab_err_tbl_t pcie_pcie_tbl[] = { - PCIE_AER_UCE_DLP, pf_panic, - PCIE_AER_UCE_PTLP, pf_analyse_ptlp, - PCIE_AER_UCE_FCP, pf_panic, - PCIE_AER_UCE_TO, pf_analyse_to, - PCIE_AER_UCE_CA, pf_analyse_ca_ur, - PCIE_AER_UCE_UC, pf_analyse_uc, - PCIE_AER_UCE_RO, pf_panic, - PCIE_AER_UCE_MTLP, pf_panic, - PCIE_AER_UCE_ECRC, pf_panic, - PCIE_AER_UCE_UR, pf_analyse_ca_ur, - NULL, NULL -}; - -const pf_fab_err_tbl_t pcie_sw_tbl[] = { - PCIE_AER_UCE_TRAINING, pf_no_panic, - PCIE_AER_UCE_DLP, pf_panic, - PCIE_AER_UCE_SD, pf_no_panic, - PCIE_AER_UCE_PTLP, pf_analyse_ptlp, - PCIE_AER_UCE_FCP, pf_panic, - PCIE_AER_UCE_TO, pf_analyse_to, - PCIE_AER_UCE_CA, pf_analyse_ca_ur, - PCIE_AER_UCE_UC, pf_analyse_uc, - PCIE_AER_UCE_RO, pf_panic, - PCIE_AER_UCE_MTLP, pf_panic, - PCIE_AER_UCE_ECRC, pf_panic, - PCIE_AER_UCE_UR, pf_analyse_ca_ur, - NULL, NULL -}; - -const pf_fab_err_tbl_t pcie_pcie_bdg_tbl[] = { - PCIE_AER_SUCE_TA_ON_SC, pf_analyse_sc, - PCIE_AER_SUCE_MA_ON_SC, pf_analyse_sc, - PCIE_AER_SUCE_RCVD_TA, pf_analyse_ma_ta, - PCIE_AER_SUCE_RCVD_MA, pf_analyse_ma_ta, - PCIE_AER_SUCE_USC_ERR, pf_panic, - PCIE_AER_SUCE_USC_MSG_DATA_ERR, pf_analyse_ma_ta, - PCIE_AER_SUCE_UC_DATA_ERR, pf_analyse_uc_data, - PCIE_AER_SUCE_UC_ATTR_ERR, pf_panic, - PCIE_AER_SUCE_UC_ADDR_ERR, pf_panic, - PCIE_AER_SUCE_TIMER_EXPIRED, pf_panic, - PCIE_AER_SUCE_PERR_ASSERT, pf_analyse_perr_assert, - PCIE_AER_SUCE_SERR_ASSERT, pf_no_panic, - PCIE_AER_SUCE_INTERNAL_ERR, pf_panic, - NULL, NULL -}; - -const pf_fab_err_tbl_t pcie_pci_bdg_tbl[] = { - PCI_STAT_PERROR, pf_analyse_pci, - PCI_STAT_S_PERROR, pf_analyse_pci, - PCI_STAT_S_SYSERR, pf_panic, - PCI_STAT_R_MAST_AB, pf_analyse_pci, - PCI_STAT_R_TARG_AB, pf_analyse_pci, - PCI_STAT_S_TARG_AB, pf_analyse_pci, - NULL, NULL -}; - -const pf_fab_err_tbl_t pcie_pci_tbl[] = { - PCI_STAT_PERROR, pf_analyse_pci, - PCI_STAT_S_PERROR, pf_analyse_pci, - PCI_STAT_S_SYSERR, pf_panic, - PCI_STAT_R_MAST_AB, pf_analyse_pci, - PCI_STAT_R_TARG_AB, pf_analyse_pci, - PCI_STAT_S_TARG_AB, pf_analyse_pci, - NULL, NULL -}; - -/* - * Analyse all the PCIe Fault Data (pfd) gathered during dispatch in the pfd - * Queue. - */ -static int -pf_analyse_error(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, - int dq_tail) -{ - int i = 0, pfd_err, err = 0; - pf_data_t *pf_data_p; - - for (pf_data_p = &dq_p[i]; i <= dq_tail; pf_data_p = &dq_p[++i]) { - pfd_err = 0; - switch (pf_data_p->dev_type) { - case PCIE_PCIECAP_DEV_TYPE_PCIE_DEV: - if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) - pfd_err |= PF_CE; - - pf_adjust_for_no_aer(pf_data_p); - pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, - pf_data_p, pcie_pcie_tbl, pf_data_p->aer_ue_status); - break; - case PCIE_PCIECAP_DEV_TYPE_UP: - case PCIE_PCIECAP_DEV_TYPE_DOWN: - if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) - pfd_err |= PF_CE; - - pf_adjust_for_no_aer(pf_data_p); - pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, - pf_data_p, pcie_sw_tbl, pf_data_p->aer_ue_status); - break; - case PCIE_PCIECAP_DEV_TYPE_ROOT: - /* Do not analyse RC info as it has already been done */ - pfd_err |= PF_MATCHED_RC; - break; - case PCIE_PCIECAP_DEV_TYPE_PCIE2PCI: - if (PCIE_DEVSTS_CE_DETECTED & pf_data_p->dev_status) - pfd_err |= PF_CE; - - if ((PCIE_DEVSTS_NFE_DETECTED | - PCIE_DEVSTS_FE_DETECTED) - & pf_data_p->dev_status) { - pf_adjust_for_no_aer(pf_data_p); - pf_adjust_for_no_saer(pf_data_p); - pfd_err |= pf_analyse_error_tbl(rpdip, derr, - dq_p, pf_data_p, pcie_pcie_tbl, - pf_data_p->aer_ue_status); - pfd_err |= pf_analyse_error_tbl(rpdip, derr, - dq_p, pf_data_p, pcie_pcie_bdg_tbl, - pf_data_p->s_aer_ue_status); - break; - } - /* - * Some non-compliant PCIe devices do not utilize PCIe - * error registers. So fallthrough and rely on legacy - * PCI error registers. - */ - /* FALLTHROUGH */ - case PCIE_PCIECAP_DEV_TYPE_PCI_DEV: - pfd_err |= pf_analyse_error_tbl(rpdip, derr, dq_p, - pf_data_p, pcie_pci_tbl, pf_data_p->status); - if (pf_data_p->hdr_type == PCI_HEADER_ONE) - pfd_err |= pf_analyse_error_tbl(rpdip, derr, - dq_p, pf_data_p, pcie_pci_bdg_tbl, - pf_data_p->s_status); - break; - } - - err |= pfd_err; - pf_data_p->severity_flags = pfd_err; - } - - return (err); -} - -static int -pf_analyse_error_tbl(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *dq_p, - pf_data_t *pf_data_p, const pf_fab_err_tbl_t *tbl, uint32_t err_reg) { - const pf_fab_err_tbl_t *row; - int err = 0; - - for (row = tbl; err_reg && (row->bit != NULL) && !(err & PF_PANIC); - row++) { - if (err_reg & row->bit) - err |= row->handler(rpdip, derr, row->bit, dq_p, - pf_data_p); - } - - if (!err) - err = PF_NO_ERROR; - - return (err); -} - -/* - * PCIe Completer Abort and Unsupport Request error analyser. If a PCIe device - * issues a CA/UR a corresponding Received CA/UR should have been seen in the - * PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so then - * this error may be safely ignored. If not check the logs and see if an - * associated handler for this transaction can be found. - */ -/* ARGSUSED */ -static int -pf_analyse_ca_ur(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - uint32_t abort_type; - - if (bit == PCIE_AER_UCE_UR) - abort_type = PCI_STAT_R_MAST_AB; - else - abort_type = PCI_STAT_R_TARG_AB; - - if (pf_matched_in_rc(dq_p, pf_data_p, abort_type)) - return (PF_MATCHED_RC); - - if (HAS_AER_LOGS(pf_data_p, bit)) { - if (pf_tlp_hdl_lookup(rpdip, derr, pf_data_p) == - PF_HDL_NOTFOUND) - return (PF_PANIC); - - return (PF_MATCHED_DEVICE); - } - - return (PF_PANIC); -} - -/* - * PCIe-PCI Bridge Received Master Abort and Target error analyser. If a PCIe - * Bridge receives a MA/TA a corresponding sent CA/UR should have been seen in - * the PCIe root complex. Check to see if RC did indeed receive a CA/UR, if so - * then this error may be safely ignored. If not check the logs and see if an - * associated handler for this transaction can be found. - */ -/* ARGSUSED */ -static int -pf_analyse_ma_ta(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - uint16_t cmd; - uint32_t addr; - pcie_req_id_t bdf; - uint32_t abort_type, trans_type; - - if (bit == PCIE_AER_SUCE_RCVD_MA) - abort_type = PCI_STAT_R_MAST_AB; - else - abort_type = PCI_STAT_R_TARG_AB; - - if (pf_matched_in_rc(dq_p, pf_data_p, abort_type)) - return (PF_MATCHED_RC); - - if (!HAS_SAER_LOGS(pf_data_p, bit)) - return (PF_PANIC); - - if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != - DDI_SUCCESS) - return (PF_PANIC); - - if (pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, bdf) == - PF_HDL_NOTFOUND) - return (PF_PANIC); - - return (PF_MATCHED_DEVICE); -} - -/* - * Generic PCI error analyser. This function is used for Parity Errors, - * Received Master Aborts, Received Target Aborts, and Signaled Target Aborts. - * In general PCI devices do not have error logs, it is very difficult to figure - * out what transaction caused the error. Instead find the nearest PCIe-PCI - * Bridge and check to see if it has logs and if it has an error associated with - * this PCI Device. - */ -/* ARGSUSED */ -static int -pf_analyse_pci(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - pf_data_t *parent_pfd_p; - uint16_t cmd; - uint32_t addr; - pcie_req_id_t bdf; - uint32_t trans_type, aer_ue_status; - pcie_ppd_t *ppd_p; - - if (pf_data_p->status & PCI_STAT_S_SYSERR) - return (PF_PANIC); - - if (bit & (PCI_STAT_PERROR | PCI_STAT_S_PERROR)) { - aer_ue_status = PCIE_AER_SUCE_PERR_ASSERT; - } else { - aer_ue_status = (PCIE_AER_SUCE_TA_ON_SC | - PCIE_AER_SUCE_MA_ON_SC | PCIE_AER_SUCE_RCVD_TA | - PCIE_AER_SUCE_RCVD_MA); - } - - parent_pfd_p = pf_get_parent_pcie_bridge(dq_p, pf_data_p); - if (parent_pfd_p == NULL) - return (PF_PANIC); - - if (!(parent_pfd_p->s_aer_ue_status & aer_ue_status) || - !HAS_SAER_LOGS(parent_pfd_p, aer_ue_status)) - return (PF_PANIC); - - if (pf_pci_decode(rpdip, parent_pfd_p, &cmd, &bdf, &addr, &trans_type) - != DDI_SUCCESS) - return (PF_PANIC); - - /* - * If the addr or bdf from the parent PCIe bridge logs belong to this - * PCI device, assume the PCIe bridge's error handling has already taken - * care of this PCI device's error. - */ - ppd_p = pcie_get_ppd(pf_data_p->dip); - if ((bdf == pf_data_p->bdf) || pf_in_addr_range(ppd_p, addr)) - return (PF_MATCHED_PARENT); - - /* - * If this device is a PCI-PCI bridge, check if the bdf in the parent - * PCIe bridge logs is in the range of this PCI-PCI Bridge's bus ranges. - * If they are, then assume the PCIe bridge's error handling has already - * taken care of this PCI-PCI bridge device's error. - */ - if ((pf_data_p->hdr_type == PCI_HEADER_ONE) && - pf_in_bus_range(ppd_p, bdf)) - return (PF_MATCHED_PARENT); - - return (PF_PANIC); -} - -/* - * PCIe Bridge transactions associated with PERR. - * o Bridge received a poisoned Non-Posted Write (CFG Writes) from PCIe - * o Bridge received a poisoned Posted Write from (MEM Writes) from PCIe - * o Bridge received a poisoned Completion on a Split Transction from PCIe - * o Bridge received a poisoned Completion on a Delayed Transction from PCIe - * - * Check for non-poisoned PCIe transactions that got forwarded to the secondary - * side and detects a PERR#. Except for delayed read completions, a poisoned - * TLP will be forwarded to the secondary bus and PERR# will be asserted. - */ -/* ARGSUSED */ -static int -pf_analyse_perr_assert(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - uint16_t cmd; - uint32_t addr; - pcie_req_id_t bdf; - uint32_t trans_type; - int sts; - int err = PF_NO_ERROR; - - if (HAS_SAER_LOGS(pf_data_p, bit)) { - if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, - &trans_type) != DDI_SUCCESS) - return (PF_PANIC); - - switch (cmd) { - case PCI_PCIX_CMD_MEMWR: - case PCI_PCIX_CMD_MEMWR_BL: - case PCI_PCIX_CMD_MEMWRBL: - /* Posted Writes Transactions */ - if (trans_type == PF_PIO_ADDR) - sts = pf_hdl_lookup(rpdip, derr->fme_ena, - trans_type, addr, bdf); - break; - case PCI_PCIX_CMD_CFWR: - /* - * Check to see if it is a non-posted write. If so, a - * UR Completion would have been sent. - */ - if (pf_matched_in_rc(dq_p, pf_data_p, - PCI_STAT_R_MAST_AB)) { - sts = PF_HDL_FOUND; - err = PF_MATCHED_RC; - break; - } - sts = pf_hdl_lookup(rpdip, derr->fme_ena, - trans_type, addr, bdf); - break; - case PCI_PCIX_CMD_SPL: - sts = pf_hdl_lookup(rpdip, derr->fme_ena, - trans_type, addr, bdf); - break; - default: - /* Unexpected situation, panic */ - sts = PF_HDL_NOTFOUND; - } - - if (sts == PF_HDL_NOTFOUND) - err = PF_PANIC; - } else { - /* - * Check to see if it is a non-posted write. If so, a UR - * Completion would have been sent. - */ - if ((pf_data_p->dev_status & PCIE_DEVCTL_UR_REPORTING_EN) && - pf_matched_in_rc(dq_p, pf_data_p, PCI_STAT_R_MAST_AB)) - err = PF_MATCHED_RC; - - /* Check for posted writes. Transaction is lost. */ - if (pf_data_p->s_status & PCI_STAT_S_PERROR) { - err = PF_PANIC; - } - - /* - * All other scenarios are due to read completions. Check for - * PERR on the primary side. If found the primary side error - * handling will take care of this error. - */ - if (err == PF_NO_ERROR) { - if (pf_data_p->status & PCI_STAT_PERROR) - err = PF_MATCHED_PARENT; - else - err = PF_PANIC; - } - } - - return (err); -} - -/* - * PCIe Poisoned TLP error analyser. If a PCIe device receives a Poisoned TLP, - * check the logs and see if an associated handler for this transaction can be - * found. - */ -/* ARGSUSED */ -static int -pf_analyse_ptlp(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - pf_data_t *parent_pfd_p; - - /* - * If AERs are supported find the logs in this device, otherwise look in - * it's parent's logs. - */ - if (HAS_AER_LOGS(pf_data_p, bit)) { - pcie_tlp_hdr_t *hdr = (pcie_tlp_hdr_t *)&pf_data_p->aer_h0; - - /* - * Double check that the log contains a poisoned TLP. - * Some devices like PLX switch do not log poison TLP headers. - */ - if (hdr->ep) { - if (pf_tlp_hdl_lookup(rpdip, derr, pf_data_p) == - PF_HDL_FOUND) - return (PF_MATCHED_DEVICE); - } - return (PF_PANIC); - } - - if (pf_data_p->parent_index != PF_DATA_NOT_FOUND) { - parent_pfd_p = &dq_p[pf_data_p->parent_index]; - if (HAS_AER_LOGS(parent_pfd_p, bit)) - return (PF_MATCHED_PARENT); - } - - return (PF_PANIC); -} - -/* - * PCIe-PCI Bridge Received Master and Target abort error analyser on Split - * Completions. If a PCIe Bridge receives a MA/TA check logs and see if an - * associated handler for this transaction can be found. - */ -/* ARGSUSED */ -static int -pf_analyse_sc(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - uint16_t cmd; - uint32_t addr; - pcie_req_id_t bdf; - uint32_t trans_type; - int sts = PF_HDL_NOTFOUND; - - if (!HAS_SAER_LOGS(pf_data_p, bit)) - return (PF_PANIC); - - if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != - DDI_SUCCESS) - return (PF_PANIC); - - if (cmd == PCI_PCIX_CMD_SPL) - sts = pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, - addr, bdf); - - if (sts == PF_HDL_NOTFOUND) - return (PF_PANIC); - - return (PF_MATCHED_DEVICE); -} - -/* - * PCIe Timeout error analyser. This error can be forgiven if it is marked as - * CE Advisory. If it is marked as advisory, this means the HW can recover - * and/or retry the transaction automatically. - */ -/* ARGSUSED */ -static int -pf_analyse_to(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - /* - * If the Advisory Non-Fatal is set, that means HW will automatically - * retry the failed transaction. - */ - if (HAS_AER_LOGS(pf_data_p, bit) && CE_ADVISORY(pf_data_p)) - return (PF_NO_PANIC); - - return (PF_PANIC); -} - -/* - * PCIe Unexpected Completion. This error can be forgiven if it is marked as - * CE Advisory. If it is marked as advisory, this means the HW can recover - * and/or retry the transaction automatically. - */ -/* ARGSUSED */ -static int -pf_analyse_uc(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - /* - * Check to see if this TLP was misrouted by matching the device BDF - * with the TLP Log. If misrouting panic, otherwise don't panic. - */ - if (HAS_AER_LOGS(pf_data_p, bit) && - (pf_data_p->bdf == (pf_data_p->aer_h2 >> 16))) - return (PF_NO_PANIC); - - return (PF_PANIC); -} - -/* - * PCIe-PCI Bridge Uncorrectable Data error anlyser. All Uncorrectable Data - * errors should have resulted in a PCIe Poisoned TLP to the RC, except for - * Posted Writes. Check the logs for Posted Writes and if the RC did not see a - * Poisoned TLP. - * - * Non-Posted Writes will also generate a UR in the completion status, which the - * RC should also see. - */ -/* ARGSUSED */ -static int -pf_analyse_uc_data(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - uint16_t cmd; - uint32_t addr; - pcie_req_id_t bdf; - uint32_t trans_type; - - if (!HAS_SAER_LOGS(pf_data_p, bit)) - return (PF_PANIC); - - if (pf_matched_in_rc(dq_p, pf_data_p, PCI_STAT_PERROR)) - return (PF_MATCHED_RC); - - if (pf_pci_decode(rpdip, pf_data_p, &cmd, &bdf, &addr, &trans_type) != - DDI_SUCCESS) - return (PF_PANIC); - - if (pf_hdl_lookup(rpdip, derr->fme_ena, trans_type, addr, bdf) == - PF_HDL_NOTFOUND) - return (PF_PANIC); - - return (PF_MATCHED_DEVICE); -} - -/* ARGSUSED */ -static int -pf_no_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - return (PF_NO_PANIC); -} - -/* ARGSUSED */ -static int -pf_matched_device(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - return (PF_MATCHED_DEVICE); -} - -/* ARGSUSED */ -static int -pf_panic(dev_info_t *rpdip, ddi_fm_error_t *derr, uint32_t bit, - pf_data_t *dq_p, pf_data_t *pf_data_p) -{ - return (PF_PANIC); -}
--- a/usr/src/uts/common/sys/pci.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/common/sys/pci.h Mon Dec 18 11:06:59 2006 -0800 @@ -695,19 +695,8 @@ /* * PCI-X bridge capability related definitions */ -#define PCI_PCIX_SEC_STATUS 0x2 /* Secondary Status offset */ -#define PCI_PCIX_SEC_STATUS_SCD 0x4 /* Split Completion Discarded */ -#define PCI_PCIX_SEC_STATUS_USC 0x8 /* Unexpected Split Complete */ -#define PCI_PCIX_SEC_STATUS_SCO 0x10 /* Split Completion Overrun */ -#define PCI_PCIX_SEC_STATUS_SRD 0x20 /* Split Completion Delayed */ -#define PCI_PCIX_SEC_STATUS_ERR_MASK 0x3C - -#define PCI_PCIX_BDG_STATUS 0x4 /* Bridge Status offset */ -#define PCI_PCIX_BDG_STATUS_USC 0x80000 -#define PCI_PCIX_BDG_STATUS_SCO 0x100000 -#define PCI_PCIX_BDG_STATUS_SRD 0x200000 -#define PCI_PCIX_BDG_STATUS_ERR_MASK 0x380000 - +#define PCI_PCIX_SEC_STATUS 0x2 /* Secondary status register offset */ +#define PCI_PCIX_BDG_STATUS 0x4 /* Bridge Status register offset */ #define PCI_PCIX_UP_SPL_CTL 0x8 /* Upstream split ctrl reg offset */ #define PCI_PCIX_DOWN_SPL_CTL 0xC /* Downstream split ctrl reg offset */ #define PCI_PCIX_BDG_ECC_STATUS 0x10 /* ECC Status register offset */
--- a/usr/src/uts/common/sys/pcie.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/common/sys/pcie.h Mon Dec 18 11:06:59 2006 -0800 @@ -592,11 +592,6 @@ #define PCIE_REQ_ID_FUNC_SHIFT 0 #define PCIE_REQ_ID_FUNC_MASK 0x0007 -#define PCIE_CPL_STS_SUCCESS 0 -#define PCIE_CPL_STS_UR 1 -#define PCIE_CPL_STS_CRS 2 -#define PCIE_CPL_STS_CA 4 - #if defined(_BIT_FIELDS_LTOH) /* * PCI Express little-endian common TLP header format
--- a/usr/src/uts/common/sys/pcie_impl.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/common/sys/pcie_impl.h Mon Dec 18 11:06:59 2006 -0800 @@ -32,171 +32,28 @@ extern "C" { #endif -#include <sys/pcie.h> - -/* PCI-E config space data for error handling and ereport */ -typedef struct pf_data { - dev_info_t *dip; - pcie_req_id_t bdf; - uint32_t severity_flags; - int parent_index; - pcie_req_id_t fault_bdf; - uint32_t fault_addr; - int send_erpt; - - /* 0-3Fh. PCI */ - uint16_t vendor_id; - uint16_t device_id; - uint8_t hdr_type; - uint16_t command; /* command */ - uint16_t status; /* status */ - uint8_t rev_id; - uint16_t s_status; /* Bridge secondary status */ - pcie_req_id_t bdg_secbus; /* Bridge secondary bus num */ - - /* 40h-FFh. PCI-X Capability */ - uint16_t pcix_s_status; /* PCI-X Secondary status */ - uint32_t pcix_bdg_status; /* PCI-X Bridge status */ - - /* 40h-FFh. PCI-E Capability */ - uint16_t pcie_off; /* PCI-E capability offset */ - uint8_t dev_type; /* device/port type */ - uint16_t dev_status; /* device status */ - - /* 100h-FFFh. Extended PCI-E */ - uint16_t aer_off; /* AER offset */ - - uint32_t aer_ce_status; /* AER Correctable Errors */ - - uint32_t aer_ue_status; /* AER Uncorrectable Errors */ - uint32_t aer_severity; - uint32_t aer_control; - uint32_t aer_h0; - uint32_t aer_h1; - uint32_t aer_h2; - uint32_t aer_h3; - - uint32_t s_aer_ue_status; /* Secondary AER UEs */ - uint32_t s_aer_control; - uint32_t s_aer_severity; - uint32_t s_aer_h0; - uint32_t s_aer_h1; - uint32_t s_aer_h2; - uint32_t s_aer_h3; -} pf_data_t; - -/* Information used while handling errors in the fabric. */ -typedef struct pf_impl { - dev_info_t *pf_rpdip; - pcie_req_id_t pf_fbdf; /* captured fault bdf to scan */ - uint32_t pf_faddr; /* captured fault addr to scan */ - ddi_fm_error_t *pf_derr; - pf_data_t *pf_dq_p; /* ptr to pcie fault data queue */ - int *pf_dq_tail_p; /* last valid index of fault data q */ -} pf_impl_t; - -/* Parent Private data of PCI/PCIe devices in a PCIe system */ -typedef struct pcie_ppd { - dev_info_t *ppd_dip; - ddi_acc_handle_t ppd_cfg_hdl; /* error handling acc handle */ - kmutex_t ppd_fm_lock; /* error handling lock */ - uint_t ppd_fm_flags; - - /* Static PCI/PCIe information */ - pcie_req_id_t ppd_bdf; - uint32_t ppd_dev_ven_id; /* device/vendor ID */ - uint8_t ppd_hdr_type; /* pci header type, see pci.h */ - uint8_t ppd_dev_type; /* PCI-E dev type, see pcie.h */ - uint8_t ppd_bdg_secbus; /* Bridge secondary bus num */ - uint16_t ppd_pcie_off; /* PCIe Capability Offset */ - uint16_t ppd_aer_off; /* PCIe Advanced Error Offset */ - uint16_t ppd_pcix_off; /* PCIx Capability Offset */ - uint8_t ppd_pcie_phfun; /* Phantom funs for pcix/pcie */ - pci_bus_range_t ppd_bus_range; /* pci bus-range property */ - ppb_ranges_t *ppd_addr_ranges; /* pci range property */ - int ppd_addr_entries; /* number of range prop */ - pci_regspec_t *ppd_assigned_addr; /* "assigned-address" prop */ - int ppd_assigned_entries; /* number of prop entries */ -} pcie_ppd_t; - -#define PCI_GET_BDF(dip) \ - ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_bdf -#define PCI_GET_SEC_BUS(dip) \ - ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_bdg_secbus -#define PCI_GET_PHFUN(dip) \ - ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_pcie_phfun - /* * The following flag is used for Broadcom 5714/5715 bridge prefetch issue. * This flag will be used both by px and px_pci nexus drivers. */ #define PX_DMAI_FLAGS_MAP_BUFZONE 0x40000 -/* ppd_fm_flags field */ -#define PF_FM_READY (1 << 0) /* ppd_fm_lock initialized */ -#define PF_IS_NH (1 << 1) /* known as non-hardened */ - -/* PCIe fabric error handling return codes */ -#define PF_NO_ERROR (1 << 0) /* No error seen */ -#define PF_CE (1 << 1) /* Correctable Error */ -#define PF_NO_PANIC (1 << 2) /* Error should not panic sys */ -#define PF_MATCHED_DEVICE (1 << 3) /* Error Handled By Device */ -#define PF_MATCHED_RC (1 << 4) /* Error Handled By RC */ -#define PF_MATCHED_PARENT (1 << 5) /* Error Handled By Parent */ -#define PF_PANIC (1 << 6) /* Error should panic system */ - -/* PCIe fabric handle lookup return codes */ -#define PF_HDL_FOUND 0 -#define PF_HDL_NOTFOUND 1 - -/* PCIe fabric handle lookup address flags */ -#define PF_DMA_ADDR (1 << 0) -#define PF_PIO_ADDR (1 << 1) -#define PF_CFG_ADDR (1 << 2) - -#define PF_SEND_ERPT_YES 1 -#define PF_SEND_ERPT_UNKNOWN 0 -#define PF_SEND_ERPT_NO -1 - -#define PF_SUCCESS (1 << 0) -#define PF_FAILURE (1 << 1) -#define PF_DO_NOT_SCAN (1 << 2) - -/* PCIe helper functions */ -extern pcie_ppd_t *pcie_get_ppd(dev_info_t *dip); - -/* PCIe Friendly Functions */ +/* + * PCI-Express Friendly Functions + */ extern int pcie_initchild(dev_info_t *dip); extern void pcie_uninitchild(dev_info_t *dip); -extern void pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); +extern void pcie_clear_errors(dev_info_t *dip, + ddi_acc_handle_t config_handle); extern int pcie_postattach_child(dev_info_t *dip); -extern void pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); -extern void pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); -extern int pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); +extern void pcie_enable_errors(dev_info_t *dip, + ddi_acc_handle_t config_handle); +extern void pcie_disable_errors(dev_info_t *dip, + ddi_acc_handle_t config_handle); extern dev_info_t *pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); extern uint32_t pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip); - -extern pcie_ppd_t *pcie_init_ppd(dev_info_t *cdip); -extern void pcie_uninit_ppd(dev_info_t *cdip); -extern boolean_t pcie_is_child(dev_info_t *dip, dev_info_t *rdip); -extern int pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf); - -/* PCIe error handling functions */ -extern int pf_en_dq(pf_data_t *pf_data_p, pf_data_t *dq_p, int *dq_tail_p, - pcie_req_id_t pbdf); -extern int pf_get_dq_size(void); -extern int pf_tlp_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, - pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type); -extern int pf_tlp_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *pf_data_p); -extern int pf_hdl_lookup(dev_info_t *rpdip, uint64_t ena, - uint32_t flag, uint32_t addr, pcie_req_id_t bdf); -extern int pf_scan_fabric(dev_info_t *rpdip, ddi_fm_error_t *derr, - pf_data_t *dq_p, int *dq_tail_p); -extern void pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc); -extern void pf_fini(dev_info_t *dip); -extern boolean_t pf_ready(dev_info_t *dip); - +extern int pcie_enable_ce(dev_info_t *dip, + ddi_acc_handle_t config_handle); #ifdef __cplusplus }
--- a/usr/src/uts/sparc/Makefile.files Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sparc/Makefile.files Mon Dec 18 11:06:59 2006 -0800 @@ -66,7 +66,7 @@ CPR_SPARC_OBJS += cpr_sparc.o PCI_PCI_OBJS += pci_pci.o pci_debug.o pci_pwr.o pcix.o -PX_PCI_OBJS += px_pci.o pcie_pwr.o +PX_PCI_OBJS += px_pci.o px_debug.o pcie_pwr.o FCODE_OBJS += fcode.o #
--- a/usr/src/uts/sparc/pci_pci/Makefile Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sparc/pci_pci/Makefile Mon Dec 18 11:06:59 2006 -0800 @@ -18,6 +18,7 @@ # # CDDL HEADER END # +# # uts/sparc/pci_pci/Makefile # Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. @@ -81,11 +82,6 @@ LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON # -# Dependency -# -LDFLAGS += -dy -Nmisc/pcie - -# # Default build targets. # .KEEP_STATE:
--- a/usr/src/uts/sun4/io/px/pcie_pwr.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/pcie_pwr.c Mon Dec 18 11:06:59 2006 -0800 @@ -36,22 +36,11 @@ #include <sys/sunndi.h> #include <sys/ddi_impldefs.h> #include <sys/ddi_implfuncs.h> -#include <sys/pcie.h> +#include <sys/pci.h> #include <sys/pcie_impl.h> -#include <sys/promif.h> /* prom_printf */ #include "pcie_pwr.h" - -#if defined(DEBUG) - -#define DBG pcie_pwr_dbg -static void pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...); -static uint_t pcie_pwr_print = 0; - -#else /* DEBUG */ - -#define DBG 0 && - -#endif /* DEBUG */ +#include "px_pci.h" +#include "px_debug.h" /* * This file implements the power management functionality for @@ -133,10 +122,10 @@ (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2))); mutex_enter(&pwr_p->pwr_lock); - DBG(dip, "pcie_power: change from %d to %d\n", + DBG(DBG_PWR, dip, "pcie_power: change from %d to %d\n", pwr_p->pwr_func_lvl, level); if (pwr_p->pwr_func_lvl == level) { - DBG(dip, "pcie_power: already at %d\n", level); + DBG(DBG_PWR, dip, "pcie_power: already at %d\n", level); ret = DDI_SUCCESS; goto pcie_pwr_done; } @@ -147,7 +136,7 @@ * or there is a hold. */ if (pwr_p->pwr_flags & PCIE_PM_BUSY) { - DBG(dip, "pcie_power: rejecting change to %d " + DBG(DBG_PWR, dip, "pcie_power: rejecting change to %d " "as busy\n", level); goto pcie_pwr_done; } @@ -161,7 +150,7 @@ ASSERT(!counters[PCIE_D0_INDEX] && !counters[PCIE_UNKNOWN_INDEX]); if (level < pwr_level_allowed(pwr_p)) { - DBG(dip, "pcie_power: rejecting level %d as" + DBG(DBG_PWR, dip, "pcie_power: rejecting level %d as" " %d is the lowest possible\n", level, pwr_level_allowed(pwr_p)); goto pcie_pwr_done; @@ -169,12 +158,12 @@ } if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) { - DBG(dip, "pcie_power: attempt to change to %d " + DBG(DBG_PWR, dip, "pcie_power: attempt to change to %d " " failed \n", level); goto pcie_pwr_done; } pwr_p->pwr_func_lvl = level; - DBG(dip, "pcie_power: level changed to %d \n", level); + DBG(DBG_PWR, dip, "pcie_power: level changed to %d \n", level); ret = DDI_SUCCESS; pcie_pwr_done: @@ -221,9 +210,9 @@ } /* Save config space, if going to D3 */ if (new == PM_LEVEL_D3) { - DBG(dip, "pwr_change: saving config space regs\n"); + DBG(DBG_PWR, dip, "pwr_change: saving config space regs\n"); if (pci_save_config_regs(dip) != DDI_SUCCESS) { - DBG(dip, "pcie_pwr_change: failed to save " + DBG(DBG_PWR, dip, "pcie_pwr_change: failed to save " "config space regs\n"); return (DDI_FAILURE); } @@ -244,9 +233,9 @@ * Restore config space if coming out of D3 */ if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) { - DBG(dip, "pcie_pwr_change: restoring config space\n"); + DBG(DBG_PWR, dip, "pcie_pwr_change: restoring config space\n"); if (pci_restore_config_regs(dip) != DDI_SUCCESS) { - DBG(dip, "pcie_pwr_change: failed to restore " + DBG(DBG_PWR, dip, "pcie_pwr_change: failed to restore " "config space regs\n"); return (DDI_FAILURE); } @@ -303,7 +292,7 @@ mutex_enter(&pwr_p->pwr_lock); switch (op) { case BUS_POWER_PRE_NOTIFICATION: - DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", + DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip), pcie_decode_pwr_op(op), old_level, new_level); /* @@ -316,12 +305,12 @@ */ if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) { if (!PCIE_IS_COMPS_COUNTED(cdip)) { - DBG(dip, "pcie_bus_power: marking child " + DBG(DBG_PWR, dip, "bus_power: marking child " "busy to disable pm \n"); (void) pm_busy_component(cdip, 0); } if (new_level < PM_LEVEL_D0 && !comp) { - DBG(dip, "pcie_bus_power: rejecting " + DBG(DBG_PWR, dip, "bus_power: rejecting " "child's attempt to go to %d\n", new_level); rv = DDI_FAILURE; } @@ -333,7 +322,7 @@ case BUS_POWER_HAS_CHANGED: case BUS_POWER_POST_NOTIFICATION: - DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", + DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip), pcie_decode_pwr_op(op), old_level, new_level); /* @@ -348,7 +337,7 @@ (void) pcie_pm_add_child(dip, cdip); if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) && (op == BUS_POWER_HAS_CHANGED)) { - DBG(dip, "pcie_bus_power: marking child " + DBG(DBG_PWR, dip, "bus_power: marking child " "busy to disable pm \n"); (void) pm_busy_component(cdip, 0); /* @@ -377,7 +366,7 @@ } if (*((int *)result) == DDI_FAILURE) { - DBG(dip, "pcie_bus_power: change for %s%d failed\n", + DBG(DBG_PWR, dip, "bus_power: change for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); break; } @@ -400,7 +389,7 @@ */ if (level_allowed >= pwr_p->pwr_func_lvl && !(pwr_p->pwr_flags & PCIE_PM_BUSY)) { - DBG(dip, "pcie_bus_power: marking busy\n"); + DBG(DBG_PWR, dip, "bus_power: marking busy\n"); (void) pm_busy_component(dip, 0); pwr_p->pwr_flags |= PCIE_PM_BUSY; break; @@ -417,7 +406,7 @@ * For pci express, we should check here whether * the link is in L1 state or not. */ - DBG(dip, "pcie_bus_power: marking idle\n"); + DBG(DBG_PWR, dip, "bus_power: marking idle\n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; break; @@ -511,7 +500,7 @@ if (!comps) return; - DBG(dip, "pcie_add_comps: unknown level counter incremented " + DBG(DBG_PWR, dip, "pcie_add_comps: unknown level counter incremented " "from %d by %d because of %s@%d\n", (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps, ddi_driver_name(cdip), ddi_get_instance(cdip)); @@ -558,7 +547,7 @@ } return; } - DBG(dip, "pcie_remove_comps:counters decremented because of " + DBG(DBG_PWR, dip, "pcie_remove_comps:counters decremented because of " "%s@%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); child_counters = PCIE_CHILD_COUNTERS(cdip); /* @@ -600,7 +589,7 @@ if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) { - DBG(dip, "can't create pm-want-child-notification \n"); + DBG(DBG_PWR, dip, "can't create pm-want-child-notification \n"); goto pwr_common_err; } pcie_pm_p->pcie_pwr_p = pwr_p; @@ -668,11 +657,11 @@ */ mutex_enter(&pwr_p->pwr_lock); ASSERT(pwr_p->pwr_hold >= 0); - DBG(dip, "pm_hold: incrementing hold \n"); + DBG(DBG_PWR, dip, "pm_hold: incrementing hold \n"); pwr_p->pwr_hold++; /* Mark itself busy, if it is not done already */ if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) { - DBG(dip, "pm_hold: marking busy\n"); + DBG(DBG_PWR, dip, "pm_hold: marking busy\n"); pwr_p->pwr_flags |= PCIE_PM_BUSY; (void) pm_busy_component(dip, 0); } @@ -682,7 +671,7 @@ } mutex_exit(&pwr_p->pwr_lock); if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { - DBG(dip, "pm_hold: attempt to raise power " + DBG(DBG_PWR, dip, "pm_hold: attempt to raise power " "from %d to %d failed\n", pwr_p->pwr_func_lvl, PM_LEVEL_D0); pcie_pm_release(dip); @@ -716,13 +705,13 @@ ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); ASSERT(pwr_p->pwr_hold > 0); - DBG(dip, "pm_subrelease: decrementing hold \n"); + DBG(DBG_PWR, dip, "pm_subrelease: decrementing hold \n"); pwr_p->pwr_hold--; ASSERT(pwr_p->pwr_hold >= 0); ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); level = pwr_level_allowed(pwr_p); if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) { - DBG(dip, "pm_subrelease: marking idle \n"); + DBG(DBG_PWR, dip, "pm_subrelease: marking idle \n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; } @@ -759,7 +748,7 @@ * and we stay at full power. */ ASSERT(pwr_p->pwr_hold > 0); - DBG(dip, "pm_add_child: decrementing hold \n"); + DBG(DBG_PWR, dip, "pm_add_child: decrementing hold \n"); pwr_p->pwr_hold--; /* * We must have made sure that busy bit @@ -804,7 +793,7 @@ if ((pwr_p->pwr_hold == 0) && (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) { if (pwr_p->pwr_flags & PCIE_PM_BUSY) { - DBG(dip, "pcie_bus_power: marking idle\n"); + DBG(DBG_PWR, dip, "bus_power: marking idle\n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; } @@ -867,7 +856,7 @@ * init'ed. They will be set up by init_child(). */ if (i_ddi_node_state(cdip) < DS_INITIALIZED) { - DBG(dip, + DBG(DBG_PWR, dip, "DDI_RESUME: skipping %s%d not in CF1\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -880,12 +869,12 @@ "nexus-saved-config-regs") != 1) continue; - DBG(dip, + DBG(DBG_PWR, dip, "DDI_RESUME: nexus restoring %s%d config regs\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) { - DBG(dip, "DDI_RESUME: " + DBG(DBG_PWR, dip, "DDI_RESUME: " "pci_config_setup for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -898,13 +887,15 @@ if (is_pcie = pcie_is_pcie(config_handle)) pcie_disable_errors(cdip, config_handle); (void) pci_restore_config_regs(cdip); - if (is_pcie) + if (is_pcie) { pcie_enable_errors(cdip, config_handle); + (void) pcie_enable_ce(cdip, config_handle); + } pci_config_teardown(&config_handle); if (ndi_prop_remove(DDI_DEV_T_NONE, cdip, "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { - DBG(dip, "%s%d can't remove prop %s", + DBG(DBG_PWR, dip, "%s%d can't remove prop %s", ddi_driver_name(cdip), ddi_get_instance(cdip), "nexus-saved-config-regs"); } @@ -939,7 +930,7 @@ mutex_exit(&pwr_p->pwr_lock); if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { - DBG(dip, "pwr_suspend: attempt " + DBG(DBG_PWR, dip, "pwr_suspend: attempt " "to raise power from %d to %d " "failed\n", pwr_p->pwr_func_lvl, PM_LEVEL_D0); @@ -974,7 +965,7 @@ * init'ed. They will be set up in init_child(). */ if (i_ddi_node_state(cdip) < DS_INITIALIZED) { - DBG(dip, "DDI_SUSPEND: skipping " + DBG(DBG_PWR, dip, "DDI_SUSPEND: skipping " "%s%d not in CF1\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -1010,16 +1001,16 @@ */ if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip, "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { - DBG(dip, "%s%d can't update prop %s", + DBG(DBG_PWR, dip, "%s%d can't update prop %s", ddi_driver_name(cdip), ddi_get_instance(cdip), "nexus-saved-config-regs"); } - DBG(dip, "DDI_SUSPEND: saving config space for" + DBG(DBG_PWR, dip, "DDI_SUSPEND: saving config space for" " %s%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); /* PCIe workaround: disable errors during 4K config save */ if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) { - DBG(dip, "DDI_SUSPEND: pci_config_setup " + DBG(DBG_PWR, dip, "DDI_SUSPEND: pci_config_setup " "for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -1028,8 +1019,10 @@ if (is_pcie = pcie_is_pcie(config_handle)) pcie_disable_errors(cdip, config_handle); (void) pci_save_config_regs(cdip); - if (is_pcie) + if (is_pcie) { pcie_enable_errors(cdip, config_handle); + (void) pcie_enable_ce(cdip, config_handle); + } pci_config_teardown(&config_handle); } return (DDI_SUCCESS); @@ -1069,24 +1062,4 @@ return ("UNKNOWN OP"); } -static void -pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...) -{ - va_list ap; - if (!pcie_pwr_print) - return; - - if (dip) - prom_printf("%s(%d): pcie pwr: ", ddi_driver_name(dip), - ddi_get_instance(dip)); -body: - va_start(ap, fmt); - if (ap) - prom_vprintf(fmt, ap); - else - prom_printf(fmt); - - va_end(ap); -} - #endif
--- a/usr/src/uts/sun4/io/px/pcie_pwr.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/pcie_pwr.h Mon Dec 18 11:06:59 2006 -0800 @@ -32,6 +32,8 @@ extern "C" { #endif +#include "px_ioapi.h" /* for msiq */ + /* index of counters for each level */ #define PCIE_D3_INDEX PM_LEVEL_D3 #define PCIE_D2_INDEX PM_LEVEL_D2
--- a/usr/src/uts/sun4/io/px/px.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px.c Mon Dec 18 11:06:59 2006 -0800 @@ -256,10 +256,6 @@ (void) ddi_prop_update_string(DDI_DEV_T_NONE, dip, "device_type", "pciex"); - - /* Initialize px_dbg for high pil printing */ - px_dbg_attach(dip, &px_p->px_dbg_hdl); - /* * Get key properties of the pci bridge node and * determine it's type (psycho, schizo, etc ...). @@ -273,12 +269,6 @@ /* Initialize device handle */ px_p->px_dev_hdl = dev_hdl; - px_p->px_dq_p = (pf_data_t *) - kmem_zalloc(sizeof (pf_data_t) * pf_get_dq_size(), - KM_SLEEP); - - px_p->px_dq_tail = -1; - /* * Initialize interrupt block. Note that this * initialize error handling for the PEC as well. @@ -382,7 +372,6 @@ err_bad_dev_init: px_free_props(px_p); err_bad_px_prop: - px_dbg_detach(dip, &px_p->px_dbg_hdl); mutex_destroy(&px_p->px_mutex); ddi_soft_state_free(px_state_p, instance); err_bad_px_softstate: @@ -479,15 +468,11 @@ px_ib_detach(px_p); (void) px_lib_dev_fini(dip); - kmem_free(px_p->px_dq_p, sizeof (pf_data_t) * - pf_get_dq_size()); - /* * Free the px soft state structure and the rest of the * resources it's using. */ px_free_props(px_p); - px_dbg_detach(dip, &px_p->px_dbg_hdl); mutex_exit(&px_p->px_mutex); mutex_destroy(&px_p->px_mutex); @@ -1249,8 +1234,6 @@ if (as->cmd == DDI_ATTACH && as->result != DDI_SUCCESS) pcie_pm_release(dip); - pf_init(rdip, (void *)px_p->px_fm_ibc); - (void) pcie_postattach_child(rdip); return (DDI_SUCCESS); @@ -1271,9 +1254,6 @@ return (pcie_pm_remove_child(dip, rdip)); } return (DDI_SUCCESS); - case DDI_PRE: - pf_fini(rdip); - return (DDI_SUCCESS); default: break; }
--- a/usr/src/uts/sun4/io/px/px_debug.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_debug.c Mon Dec 18 11:06:59 2006 -0800 @@ -32,8 +32,7 @@ #include <sys/sunddi.h> /* dev_info_t */ #include <sys/ddi_impldefs.h> #include <sys/disp.h> -#include <sys/archsystm.h> /* getpil() */ -#include "px_obj.h" +#include "px_debug.h" /*LINTLIBRARY*/ @@ -113,35 +112,17 @@ /* LAST */ "unknown" }; -/* Tunables */ -static int px_dbg_msg_size = 16; /* # of Qs. Must be ^2 */ - -/* Non-Tunables */ -static int px_dbg_qmask = 0xFFFF; /* Mask based on Q size */ -static px_dbg_msg_t *px_dbg_msgq = NULL; /* Debug Msg Queue */ -static uint8_t px_dbg_reference = 0; /* Reference Counter */ -static kmutex_t px_dbg_mutex; /* Mutex for dequeuing */ -static uint8_t px_dbg_qtail = 0; /* Pointer to q tail */ -static uint8_t px_dbg_qhead = 0; /* Pointer to q head */ -static uint_t px_dbg_qsize = 0; /* # of pending messages */ -static uint_t px_dbg_failed = 0; /* # of overflows */ - -/* Forward Declarations */ -static void px_dbg_print(px_debug_bit_t bit, dev_info_t *dip, char *fmt, - va_list args); -static void px_dbg_queue(px_debug_bit_t bit, dev_info_t *dip, char *fmt, - va_list args); -static uint_t px_dbg_drain(caddr_t arg1, caddr_t arg2); - -/* - * Print function called either directly by px_dbg or through soft interrupt. - * This function cannot be called directly in threads with PIL above clock. - */ -static void -px_dbg_print(px_debug_bit_t bit, dev_info_t *dip, char *fmt, va_list args) +void +px_dbg(px_debug_bit_t bit, dev_info_t *dip, char *fmt, ...) { int cont = bit >> DBG_BITS; + va_list ap; + bit &= DBG_MASK; + if (bit >= sizeof (px_debug_sym) / sizeof (char *)) + return; + if (!(1ull << bit & px_debug_flags)) + return; if (cont) goto body; @@ -151,141 +132,8 @@ else prom_printf("px: %s: ", px_debug_sym[bit]); body: - if (args) - prom_vprintf(fmt, args); - else - prom_printf(fmt); -} - -/* - * Queueing mechanism to log px_dbg messages if calling thread is running with a - * PIL above clock. It's Multithreaded safe. - */ -static void -px_dbg_queue(px_debug_bit_t bit, dev_info_t *dip, char *fmt, va_list args) -{ - int instance = DIP_TO_INST(dip); - px_t *px_p = INST_TO_STATE(instance); - uint8_t q_no; - px_dbg_msg_t *msg_p; - - /* Check to make sure the queue hasn't overflowed */ - if (atomic_inc_uint_nv(&px_dbg_qsize) >= px_dbg_msg_size) { - px_dbg_failed++; - atomic_dec_uint(&px_dbg_qsize); - return; - } - - /* - * Grab the next available queue bucket. Incrementing the tail here - * doesn't need to be protected, as it is guaranteed to not overflow. - */ - q_no = ++px_dbg_qtail & px_dbg_qmask; - msg_p = &px_dbg_msgq[q_no]; - - ASSERT(msg_p->active == B_FALSE); - - /* Print the message in the buffer */ - vsnprintf(msg_p->msg, DBG_MSG_SIZE, fmt, args); - msg_p->bit = bit; - msg_p->dip = dip; - msg_p->active = B_TRUE; - - /* Trigger Soft Int */ - ddi_intr_trigger_softint(px_p->px_dbg_hdl, (caddr_t)NULL); -} - -/* - * Callback function for queuing px_dbg in high PIL by soft intr. This code - * assumes it will be called serially for every msg. - */ -static uint_t -px_dbg_drain(caddr_t arg1, caddr_t arg2) { - uint8_t q_no; - px_dbg_msg_t *msg_p; - uint_t ret = DDI_INTR_UNCLAIMED; - - mutex_enter(&px_dbg_mutex); - while (px_dbg_qsize) { - atomic_dec_uint(&px_dbg_qsize); - if (px_dbg_failed) { - cmn_err(CE_WARN, "%d msg(s) were lost", - px_dbg_failed); - px_dbg_failed = 0; - } - - q_no = ++px_dbg_qhead & px_dbg_qmask; - msg_p = &px_dbg_msgq[q_no]; - - if (msg_p->active) { - px_dbg_print(msg_p->bit, msg_p->dip, msg_p->msg, NULL); - msg_p->active = B_FALSE; - } - ret = DDI_INTR_CLAIMED; - } - - mutex_exit(&px_dbg_mutex); - return (ret); -} - -void -px_dbg(px_debug_bit_t bit, dev_info_t *dip, char *fmt, ...) -{ - va_list ap; - - bit &= DBG_MASK; - if (bit >= sizeof (px_debug_sym) / sizeof (char *)) - return; - if (!(1ull << bit & px_debug_flags)) - return; - va_start(ap, fmt); - if (getpil() > LOCK_LEVEL) - px_dbg_queue(bit, dip, fmt, ap); - else - px_dbg_print(bit, dip, fmt, ap); + prom_vprintf(fmt, ap); va_end(ap); } #endif /* DEBUG */ - -void -px_dbg_attach(dev_info_t *dip, ddi_softint_handle_t *dbg_hdl) -{ -#ifdef DEBUG - if (px_dbg_reference++ == 0) { - int size = px_dbg_msg_size; - - /* Check if px_dbg_msg_size is ^2 */ - size = (size & (size - 1)) ? ((size | ~size) + 1) : size; - px_dbg_msg_size = size; - px_dbg_qmask = size - 1; - px_dbg_msgq = kmem_zalloc(sizeof (px_dbg_msg_t) * size, - KM_SLEEP); - - mutex_init(&px_dbg_mutex, NULL, MUTEX_DRIVER, NULL); - } - - if (ddi_intr_add_softint(dip, dbg_hdl, - DDI_INTR_SOFTPRI_MAX, px_dbg_drain, NULL) != DDI_SUCCESS) { - DBG(DBG_ATTACH, dip, - "Unable to allocate soft int for DBG printing.\n"); - dbg_hdl = NULL; - } -#endif /* DEBUG */ -} - -/* ARGSUSED */ -void -px_dbg_detach(dev_info_t *dip, ddi_softint_handle_t *dbg_hdl) -{ -#ifdef DEBUG - if (--px_dbg_reference == 0) { - if (dbg_hdl != NULL) - (void) ddi_intr_remove_softint(*dbg_hdl); - if (px_dbg_msgq != NULL) - kmem_free(px_dbg_msgq, - sizeof (px_dbg_msg_t) * px_dbg_msg_size); - mutex_destroy(&px_dbg_mutex); - } -#endif /* DEBUG */ -}
--- a/usr/src/uts/sun4/io/px/px_debug.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_debug.h Mon Dec 18 11:06:59 2006 -0800 @@ -110,18 +110,6 @@ #define DBG_BITS 6 #define DBG_CONT (1 << DBG_BITS) #define DBG_MASK (DBG_CONT - 1) -#define DBG_MSG_SIZE 320 - -/* Used only during High PIL printing */ -typedef struct px_dbg_msg { - boolean_t active; - px_debug_bit_t bit; - dev_info_t *dip; - char msg[DBG_MSG_SIZE]; -} px_dbg_msg_t; - -extern void px_dbg_attach(dev_info_t *dip, ddi_softint_handle_t *px_dbg_hdl); -extern void px_dbg_detach(dev_info_t *dip, ddi_softint_handle_t *px_dbg_hdl); #if defined(DEBUG)
--- a/usr/src/uts/sun4/io/px/px_fm.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_fm.c Mon Dec 18 11:06:59 2006 -0800 @@ -36,27 +36,52 @@ #include <sys/membar.h> #include "px_obj.h" -#define PX_PCIE_PANIC_BITS \ - (PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \ - PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC | \ - PCIE_AER_UCE_UR) -#define PX_PCIE_NO_PANIC_BITS \ - (PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \ - PCIE_AER_UCE_UC) +typedef struct px_fabric_cfgspace { + /* Error information */ + msgcode_t msg_code; + pcie_req_id_t rid; + + /* Config space header and device type */ + uint8_t hdr_type; + uint16_t dev_type; + + /* Register pointers */ + uint16_t cap_off; + uint16_t aer_off; + + /* PCI register values */ + uint32_t sts_reg; + uint32_t sts_sreg; -static void px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs); -static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, - px_err_pcie_t *regs); + /* PCIE register values */ + uint32_t dev_sts_reg; + uint32_t aer_ce_reg; + uint32_t aer_ue_reg; + uint32_t aer_sev_reg; + uint32_t aer_ue_sreg; + uint32_t aer_sev_sreg; -#if defined(DEBUG) -static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity); -#else /* DEBUG */ -#define px_pcie_log 0 && -#endif /* DEBUG */ + /* PCIE Header Log Registers */ + uint32_t aer_h1; + uint32_t aer_h2; + uint32_t aer_h3; + uint32_t aer_h4; + uint32_t aer_sh1; + uint32_t aer_sh2; + uint32_t aer_sh3; + uint32_t aer_sh4; +} px_fabric_cfgspace_t; -/* external functions */ -extern int pci_xcap_locate(ddi_acc_handle_t h, uint16_t id, uint16_t *base_p); -extern int pci_lcap_locate(ddi_acc_handle_t h, uint8_t id, uint16_t *base_p); +static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); +static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); +static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); +static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); +static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); +static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); +static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); +static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); +static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, + pcie_req_id_t rid, ddi_fm_error_t *derr); /* * Initialize px FMA support @@ -172,6 +197,17 @@ } /* + * Function used by PCI error handlers to check if captured address is stored + * in the DMA or ACC handle caches. + */ +int +px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) +{ + int ret = ndi_fmc_error(dip, NULL, type, fme_ena, afar); + return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); +} + +/* * Function used to initialize FMA for our children nodes. Called * through pci busops when child node calls ddi_fm_init. */ @@ -226,109 +262,435 @@ /* * PCI error callback which is registered with our parent to call * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors - * and PCI BERR/TO/UE on IO Loads. + * and PCI BERR/TO/UE + * + * Dispatch on all known leaves of this fire device because we cannot tell + * which side the error came from. */ /*ARGSUSED*/ int px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { - dev_info_t *pdip = ddi_get_parent(dip); - px_t *px_p = (px_t *)impl_data; - int i, acc_type = 0; - int lookup, rc_err, fab_err = PF_NO_PANIC; - uint32_t addr, addr_high, addr_low; - pcie_req_id_t bdf; - px_ranges_t *ranges_p; - int range_len; + px_t *px_p = (px_t *)impl_data; + int err = PX_OK; + int fatal = 0; + int nonfatal = 0; + int unknown = 0; + int ret = DDI_FM_OK; + + mutex_enter(&px_p->px_fm_mutex); + + err = px_err_handle(px_p, derr, PX_TRAP_CALL, B_TRUE); + + if (!px_lib_is_in_drain_state(px_p)) + ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); + + mutex_exit(&px_p->px_fm_mutex); + + switch (ret) { + case DDI_FM_FATAL: + fatal++; + break; + case DDI_FM_NONFATAL: + nonfatal++; + break; + case DDI_FM_UNKNOWN: + unknown++; + break; + default: + break; + } + + ret = (fatal != 0) ? DDI_FM_FATAL : + ((nonfatal != 0) ? DDI_FM_NONFATAL : + (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); + + /* fire fatal error overrides device error */ + if (err & (PX_FATAL_GOS | PX_FATAL_SW)) + ret = DDI_FM_FATAL; + /* if fire encounts no error, then take whatever device error */ + else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) + ret = DDI_FM_NONFATAL; + + return (ret); +} + +static uint16_t +px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) +{ + uint32_t hdr, hdr_next_ptr, hdr_cap_id; + uint16_t offset = PCIE_EXT_CAP; + int deadcount = 0; + + /* Find the Advanced Error Register */ + hdr = px_fab_get(px_p, rid, offset); + hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & + PCIE_EXT_CAP_NEXT_PTR_MASK; + hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & + PCIE_EXT_CAP_ID_MASK; + + while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && + (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { + offset = hdr_next_ptr; + hdr = px_fab_get(px_p, rid, offset); + hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & + PCIE_EXT_CAP_NEXT_PTR_MASK; + hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & + PCIE_EXT_CAP_ID_MASK; + + if (deadcount++ > 100) + break; + } + + if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) + return (offset); + + return (0); +} + +static uint16_t +px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) +{ + uint32_t hdr, hdr_next_ptr, hdr_cap_id; + uint16_t offset = PCI_CONF_STAT; + int deadcount = 0; + + hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; + if (!(hdr & PCI_STAT_CAP)) { + /* This is not a PCIE device */ + return (0); + } - /* - * Deadlock scenario: - * 1. A fabric or mondo 62 interrupt with respect to px0 - T1/cpu0; - * 2. While error handling thread T1 is running on cpu0, a trap - * occurs to cpu1 - T2/cpu1; - * 3. While doing error handling on T1, a precise trap occurs, - * overtaken T1 - T1+/cpu0; - * - * Why threads deadlock: - * T1 owns px_fm_mutex, T2 owns rootnex' fh_lock, but blocked on - * px_fm_mutex, T1+ blocked on rootnex' fh_lock which won't be - * released since T2 will never get px_fm_mutex since T1+ buried - * thread T1 who is responsible for releasing px_fm_mutex. - * - * Solution: - * px_fm_callback must release rootnex' fh_lock prior to acquire - * px_fm_mutex and reaquire the fh_lock after release px_fm_mutex; - * if px_fm_callback is unable to acquire px_fm_mutex, meaning the - * latest trap has either overtaken the error handling thread or an - * error handling thread on another cpu owns it, just quit with OK - * status. Note, in this case, the cpu sync error handler should - * respect nexus'return status and not to panic, otherwise system - * will hang. - */ - i_ddi_fm_handler_exit(pdip); - if (!mutex_tryenter(&px_p->px_fm_mutex)) { - i_ddi_fm_handler_enter(pdip); - return (DDI_FM_OK); + hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); + hdr_next_ptr = hdr & 0xFF; + hdr_cap_id = 0; + + while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && + (hdr_cap_id != PCI_CAP_ID_PCI_E)) { + offset = hdr_next_ptr; + + if (hdr_next_ptr < 0x40) { + break; + } + + hdr = px_fab_get(px_p, rid, hdr_next_ptr); + hdr_next_ptr = (hdr >> 8) & 0xFF; + hdr_cap_id = hdr & 0xFF; + + if (deadcount++ > 100) + break; + } + + if (hdr_cap_id == PCI_CAP_ID_PCI_E) + return (offset); + + return (0); +} + +/* + * This function checks the primary status registers. + * Take the PCI status register and translate it to PCIe equivalent. + */ +static int +px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { + uint16_t sts_reg = cs->sts_reg >> 16; + uint16_t pci_status; + uint32_t pcie_status; + int ret = PX_NONFATAL; + + /* Parity Err == Send/Recv Poisoned TLP */ + pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; + pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; + if (sts_reg & pci_status) + ret |= PX_FABRIC_ERR_SEV(pcie_status, + px_fabric_die_ue, px_fabric_die_ue_gos); + + /* Target Abort == Completer Abort */ + pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; + pcie_status = PCIE_AER_UCE_CA; + if (sts_reg & pci_status) + ret |= PX_FABRIC_ERR_SEV(pcie_status, + px_fabric_die_ue, px_fabric_die_ue_gos); + + /* Master Abort == Unsupport Request */ + pci_status = PCI_STAT_R_MAST_AB; + pcie_status = PCIE_AER_UCE_UR; + if (sts_reg & pci_status) + ret |= PX_FABRIC_ERR_SEV(pcie_status, + px_fabric_die_ue, px_fabric_die_ue_gos); + + /* System Error == Uncorrectable Error */ + pci_status = PCI_STAT_S_SYSERR; + pcie_status = (uint32_t)-1; + if (sts_reg & pci_status) + ret |= PX_FABRIC_ERR_SEV(pcie_status, + px_fabric_die_ue, px_fabric_die_ue_gos); + + return (ret); +} + +/* + * This function checks the secondary status registers. + * Switches and Bridges have a different behavior. + */ +static int +px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { + uint16_t sts_reg = cs->sts_sreg >> 16; + int ret = PX_NONFATAL; + + if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + /* + * This is a PCIE-PCI bridge, but only check the severity + * if this device doesn't support AERs. + */ + if (!cs->aer_off) + ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, + px_fabric_die_bdg_sts_gos); + } else { + /* This is most likely a PCIE switch */ + ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, + px_fabric_die_sw_sts_gos); } - addr_high = (uint32_t)((uint64_t)derr->fme_bus_specific >> 32); - addr_low = (uint32_t)((uint64_t)derr->fme_bus_specific); + return (ret); +} + +/* + * This function checks and clears the primary AER. + */ +static int +px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { + uint32_t chk_reg, chk_reg_gos, off_reg, reg; + int ret = PX_NONFATAL; + + /* Determine severity and clear the AER */ + switch (cs->msg_code) { + case PCIE_MSG_CODE_ERR_COR: + off_reg = PCIE_AER_CE_STS; + chk_reg = px_fabric_die_ce; + chk_reg_gos = px_fabric_die_ce_gos; + reg = cs->aer_ce_reg; + break; + case PCIE_MSG_CODE_ERR_NONFATAL: + off_reg = PCIE_AER_UCE_STS; + chk_reg = px_fabric_die_ue; + chk_reg_gos = px_fabric_die_ue_gos; + reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); + break; + case PCIE_MSG_CODE_ERR_FATAL: + off_reg = PCIE_AER_UCE_STS; + chk_reg = px_fabric_die_ue; + chk_reg_gos = px_fabric_die_ue_gos; + reg = cs->aer_ue_reg & cs->aer_sev_reg; + break; + default: + /* Major error force a panic */ + return (PX_FATAL_GOS); + } + px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); + ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); + + return (ret); +} + +/* + * This function checks and clears the secondary AER. + */ +static int +px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { + uint32_t chk_reg, chk_reg_gos, off_reg, reg; + uint32_t sev; + int ret = PX_NONFATAL; - /* - * Make sure this failed load came from this PCIe port. Check by - * matching the upper 32 bits of the address with the ranges property. - */ - range_len = px_p->px_ranges_length / sizeof (px_ranges_t); - i = 0; - for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { - if (ranges_p->parent_high == addr_high) { - switch (ranges_p->child_high & PCI_ADDR_MASK) { - case PCI_ADDR_CONFIG: - acc_type = PF_CFG_ADDR; - addr = NULL; - bdf = (pcie_req_id_t)(addr_low >> 12); - break; - case PCI_ADDR_MEM32: - acc_type = PF_DMA_ADDR; - addr = addr_low; - bdf = NULL; - break; - } - break; - } + /* Determine severity and clear the AER */ + switch (cs->msg_code) { + case PCIE_MSG_CODE_ERR_COR: + /* Ignore Correctable Errors */ + sev = 0; + break; + case PCIE_MSG_CODE_ERR_NONFATAL: + sev = ~(cs->aer_sev_sreg); + break; + case PCIE_MSG_CODE_ERR_FATAL: + sev = cs->aer_sev_sreg; + break; + default: + /* Major error force a panic */ + return (DDI_FM_FATAL); + } + off_reg = PCIE_AER_SUCE_STS; + chk_reg = px_fabric_die_sue; + chk_reg_gos = px_fabric_die_sue_gos; + reg = cs->aer_ue_sreg & sev; + px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); + ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); + + return (ret); +} + +static int +px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) +{ + pcie_req_id_t rid = cs->rid; + uint16_t cap_off = cs->cap_off; + uint16_t aer_off = cs->aer_off; + uint8_t hdr_type = cs->hdr_type; + uint16_t dev_type = cs->dev_type; + int ret = PX_NONFATAL; + + if (hdr_type == PCI_HEADER_PPB) { + ret |= px_fabric_handle_ssts(cs); + } + + if (!aer_off) { + ret |= px_fabric_handle_psts(cs); + } + + if (aer_off) { + ret |= px_fabric_handle_paer(px_p, cs); + } + + if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { + ret |= px_fabric_handle_saer(px_p, cs); + } + + /* Clear the standard PCIe error registers */ + px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); + + /* Clear the legacy error registers */ + px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); + + /* Clear the legacy secondary error registers */ + if (hdr_type == PCI_HEADER_PPB) { + px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, + cs->sts_sreg); } - /* This address doesn't belong to this leaf, just return with OK */ - if (!acc_type) { - mutex_exit(&px_p->px_fm_mutex); - i_ddi_fm_handler_enter(pdip); - return (DDI_FM_OK); + return (ret); +} + +static void +px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) +{ + uint16_t cap_off, aer_off; + pcie_req_id_t rid = cs->rid; + + /* Gather Basic Device Information */ + cs->hdr_type = (px_fab_get(px_p, rid, PCI_CONF_CACHE_LINESZ) >> 16) & + PCI_HEADER_TYPE_M; + + cs->cap_off = px_fabric_get_pciecap(px_p, rid); + cap_off = cs->cap_off; + if (!cap_off) + return; + + cs->aer_off = px_fabric_get_aer(px_p, rid); + aer_off = cs->aer_off; + + cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; + cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; + + /* Get the Primary Sts Reg */ + cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); + + /* If it is a bridge/switch get the Secondary Sts Reg */ + if (cs->hdr_type == PCI_HEADER_PPB) + cs->sts_sreg = px_fab_get(px_p, rid, + PCI_BCNF_IO_BASE_LOW); + + /* Get the PCIe Dev Sts Reg */ + cs->dev_sts_reg = px_fab_get(px_p, rid, + cap_off + PCIE_DEVCTL); + + if (!aer_off) + return; + + /* Get the AER register information */ + cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); + cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); + cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); + cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); + cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); + cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); + cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); + + if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) + return; + + /* If this is a bridge check secondary aer */ + cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); + cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); + cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); + cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); + cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); + cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); +} + +/* + * If a fabric intr occurs, query and clear the error registers on that device. + * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. + */ +static uint_t +px_fabric_check(px_t *px_p, msgcode_t msg_code, + pcie_req_id_t rid, ddi_fm_error_t *derr) +{ + dev_info_t *dip = px_p->px_dip; + char buf[FM_MAX_CLASS]; + px_fabric_cfgspace_t cs; + int ret; + + /* clear cs */ + bzero(&cs, sizeof (px_fabric_cfgspace_t)); + + cs.msg_code = msg_code; + cs.rid = rid; + + px_fabric_fill_cs(px_p, &cs); + if (cs.cap_off) + ret = px_fabric_handle(px_p, &cs); + else + ret = PX_FATAL_GOS; + + (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); + ddi_fm_ereport_post(dip, buf, derr->fme_ena, + DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, + PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, + PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, + "cap_off", DATA_TYPE_UINT16, cs.cap_off, + "aer_off", DATA_TYPE_UINT16, cs.aer_off, + "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, + "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, + "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, + "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, + "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, + "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, + "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, + "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, + "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, + "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, + "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, + "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, + "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, + "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, + "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, + "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, + "severity", DATA_TYPE_UINT32, ret, + NULL); + + /* Check for protected access */ + switch (derr->fme_flag) { + case DDI_FM_ERR_EXPECTED: + case DDI_FM_ERR_PEEK: + case DDI_FM_ERR_POKE: + ret &= PX_FATAL_GOS; + break; } - rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL); - lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, addr, bdf); - if (!px_lib_is_in_drain_state(px_p)) { - /* - * This is to ensure that device corresponding to the addr of - * the failed PIO/CFG load gets scanned. - */ - px_rp_en_q(px_p, bdf, addr, - (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB)); - fab_err = pf_scan_fabric(dip, derr, px_p->px_dq_p, - &px_p->px_dq_tail); - } - - mutex_exit(&px_p->px_fm_mutex); - i_ddi_fm_handler_enter(pdip); - - if ((rc_err & (PX_PANIC | PX_PROTECTED)) || (fab_err & PF_PANIC) || - (lookup == PF_HDL_NOTFOUND)) - return (DDI_FM_FATAL); - else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_NO_ERROR)) - return (DDI_FM_OK); - - return (DDI_FM_NONFATAL); + if (px_fabric_die && + (ret & (PX_FATAL_GOS | PX_FATAL_SW))) + ret = DDI_FM_FATAL; + return (ret); } /* @@ -336,7 +698,7 @@ * Interrupt handler for PCIE fabric block. * o lock * o create derr - * o px_err_cmn_intr(leaf, with jbc) + * o px_err_handle(leaf, with jbc) * o send ereport(fire fmri, derr, payload = BDF) * o dispatch (leaf) * o unlock @@ -344,10 +706,11 @@ */ /* ARGSUSED */ uint_t -px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid) +px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, + pcie_req_id_t rid) { dev_info_t *rpdip = px_p->px_dip; - int rc_err, fab_err = PF_NO_PANIC; + int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; ddi_fm_error_t derr; mutex_enter(&px_p->px_fm_mutex); @@ -358,20 +721,26 @@ derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); derr.fme_flag = DDI_FM_ERR_UNEXPECTED; - /* Ensure that the rid of the fabric message will get scanned. */ - px_rp_en_q(px_p, rid, NULL, NULL); - - rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); + /* send ereport/handle/clear fire registers */ + err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); - /* call rootport dispatch */ - if (!px_lib_is_in_drain_state(px_p)) { - fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, - &px_p->px_dq_tail); - } + /* Check and clear the fabric error */ + fab_err = px_fabric_check(px_p, msg_code, rid, &derr); + + /* Check all child devices for errors */ + ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); mutex_exit(&px_p->px_fm_mutex); - px_err_panic(rc_err, PX_RC, fab_err); + /* + * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, + * therefore it does not cause panic. + */ + if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || + (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) + PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" + "(%x,%x,%x)\n", ddi_driver_name(rpdip), + ddi_get_instance(rpdip), err, fab_err, ret); return (DDI_INTR_CLAIMED); } @@ -438,255 +807,3 @@ break; } } - -/* - * Suggest panic if any EQ (except CE q) has overflown. - */ -int -px_err_check_eq(dev_info_t *dip) -{ - px_t *px_p = DIP_TO_STATE(dip); - px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; - px_pec_t *pec_p = px_p->px_pec_p; - msiqid_t eq_no = msiq_state_p->msiq_1st_msiq_id; - pci_msiq_state_t msiq_state; - int i; - - for (i = 0; i < msiq_state_p->msiq_cnt; i++) { - if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */ - continue; - if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) != - DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR) - return (PX_PANIC); - } - return (PX_NO_PANIC); -} - -static void -px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs) -{ - px_t *px_p = DIP_TO_STATE(rpdip); - pf_data_t pf_data = {0}; - pcie_req_id_t fault_bdf = 0; - uint32_t fault_addr = 0; - uint16_t s_status = 0; - - /* - * set RC s_status in PCI term to coordinate with downstream fabric - * errors ananlysis. - */ - if (regs->primary_ue & PCIE_AER_UCE_UR) - s_status = PCI_STAT_R_MAST_AB; - if (regs->primary_ue & PCIE_AER_UCE_CA) - s_status = PCI_STAT_R_TARG_AB; - if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC)) - s_status = PCI_STAT_PERROR; - - if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) { - pf_data.aer_h0 = regs->rx_hdr1; - pf_data.aer_h1 = regs->rx_hdr2; - pf_data.aer_h2 = regs->rx_hdr3; - pf_data.aer_h3 = regs->rx_hdr4; - - pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL); - } else if (regs->primary_ue & PCIE_AER_UCE_PTLP) { - pcie_tlp_hdr_t *tlp_p; - - pf_data.aer_h0 = regs->rx_hdr1; - pf_data.aer_h1 = regs->rx_hdr2; - pf_data.aer_h2 = regs->rx_hdr3; - pf_data.aer_h3 = regs->rx_hdr4; - - tlp_p = (pcie_tlp_hdr_t *)&pf_data.aer_h0; - if (tlp_p->type == PCIE_TLP_TYPE_CPL) - pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL); - - pf_data.aer_h0 = regs->tx_hdr1; - pf_data.aer_h1 = regs->tx_hdr2; - pf_data.aer_h2 = regs->tx_hdr3; - pf_data.aer_h3 = regs->tx_hdr4; - - pf_tlp_decode(rpdip, &pf_data, NULL, &fault_addr, NULL); - } - - px_rp_en_q(px_p, fault_bdf, fault_addr, s_status); -} - -int -px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs) -{ - uint32_t ce_reg, ue_reg; - int err = PX_NO_ERROR; - - ce_reg = regs->ce_reg; - if (ce_reg) - err |= (ce_reg & px_fabric_die_rc_ce) ? PX_PANIC : PX_NO_ERROR; - - ue_reg = regs->ue_reg; - if (!ue_reg) - goto done; - - if (ue_reg & PCIE_AER_UCE_PTLP) - err |= px_pcie_ptlp(dip, derr, regs); - - if (ue_reg & PX_PCIE_PANIC_BITS) - err |= PX_PANIC; - - if (ue_reg & PX_PCIE_NO_PANIC_BITS) - err |= PX_NO_PANIC; - - /* Scan the fabric to clean up error bits, for the following errors. */ - if (ue_reg & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_CA | PCIE_AER_UCE_UR)) - px_err_fill_pfd(dip, regs); -done: - px_pcie_log(dip, regs, err); - return (err); -} - -#if defined(DEBUG) -static void -px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity) -{ - DBG(DBG_ERR_INTR, dip, - "A PCIe RC error has occured with a severity of \"%s\"\n" - "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n" - "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n", - (severity & PX_PANIC) ? "PANIC" : "NO PANIC", regs->ce_reg, - regs->ue_reg, regs->primary_ue, regs->tx_hdr1, regs->tx_hdr2, - regs->tx_hdr3, regs->tx_hdr4, regs->rx_hdr1, regs->rx_hdr2, - regs->rx_hdr3, regs->rx_hdr4); -} -#endif /* DEBUG */ - -/* - * look through poisoned TLP cases and suggest panic/no panic depend on - * handle lookup. - */ -static int -px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs) -{ - pf_data_t pf_data; - pcie_req_id_t bdf; - uint32_t addr, trans_type; - int tlp_sts, tlp_cmd; - int sts = PF_HDL_NOTFOUND; - - if (regs->primary_ue != PCIE_AER_UCE_PTLP) - return (PX_PANIC); - - if (!regs->rx_hdr1) - goto done; - - pf_data.aer_h0 = regs->rx_hdr1; - pf_data.aer_h1 = regs->rx_hdr2; - pf_data.aer_h2 = regs->rx_hdr3; - pf_data.aer_h3 = regs->rx_hdr4; - - tlp_sts = pf_tlp_decode(dip, &pf_data, &bdf, &addr, &trans_type); - tlp_cmd = ((pcie_tlp_hdr_t *)(&pf_data.aer_h0))->type; - - if (tlp_sts == DDI_FAILURE) - goto done; - - switch (tlp_cmd) { - case PCIE_TLP_TYPE_CPL: - case PCIE_TLP_TYPE_CPLLK: - /* - * Usually a PTLP is a CPL with data. Grab the completer BDF - * from the RX TLP, and the original address from the TX TLP. - */ - if (regs->tx_hdr1) { - pf_data.aer_h0 = regs->tx_hdr1; - pf_data.aer_h1 = regs->tx_hdr2; - pf_data.aer_h2 = regs->tx_hdr3; - pf_data.aer_h3 = regs->tx_hdr4; - - sts = pf_tlp_decode(dip, &pf_data, NULL, &addr, - &trans_type); - } /* FALLTHRU */ - case PCIE_TLP_TYPE_IO: - case PCIE_TLP_TYPE_MEM: - case PCIE_TLP_TYPE_MEMLK: - sts = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr, bdf); - break; - default: - sts = PF_HDL_NOTFOUND; - } -done: - return (sts == PF_HDL_NOTFOUND ? PX_PANIC : PX_NO_PANIC); -} - -/* - * This function appends a pf_data structure to the error q which is used later - * during PCIe fabric scan. It signifies: - * o errs rcvd in RC, that may have been propagated to/from the fabric - * o the fabric scan code should scan the device path of fault bdf/addr - * - * fault_bdf: The bdf that caused the fault, which may have error bits set. - * fault_addr: The PIO addr that caused the fault, such as failed PIO, but not - * failed DMAs. - * s_status: Secondary Status equivalent to why the fault occured. - * (ie S-TA/MA, R-TA) - * Either the fault bdf or addr may be NULL, but not both. - */ -int px_foo = 0; -void -px_rp_en_q(px_t *px_p, pcie_req_id_t fault_bdf, uint32_t fault_addr, - uint16_t s_status) -{ - pf_data_t pf_data = {0}; - - if (!fault_bdf && !fault_addr) - return; - - pf_data.dev_type = PCIE_PCIECAP_DEV_TYPE_ROOT; - if (px_foo) { - pf_data.fault_bdf = px_foo; - px_foo = 0; - } else - pf_data.fault_bdf = fault_bdf; - - pf_data.fault_addr = fault_addr; - pf_data.s_status = s_status; - pf_data.send_erpt = PF_SEND_ERPT_NO; - - (void) pf_en_dq(&pf_data, px_p->px_dq_p, &px_p->px_dq_tail, -1); -} - -/* - * Panic if the err tunable is set and that we are not already in the middle - * of panic'ing. - */ -#define MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1) -void -px_err_panic(int err, int msg, int fab_err) -{ - char fm_msg[96] = ""; - int ferr = PX_NO_ERROR; - - if (panicstr) - return; - - if (!(err & px_die)) - goto fabric; - if (msg & PX_RC) - (void) strncat(fm_msg, px_panic_rc_msg, MSZ); - if (msg & PX_RP) - (void) strncat(fm_msg, px_panic_rp_msg, MSZ); - if (msg & PX_HB) - (void) strncat(fm_msg, px_panic_hb_msg, MSZ); - -fabric: - if (fab_err & PF_PANIC) - ferr = PX_PANIC; - if (fab_err & ~(PF_PANIC | PF_NO_ERROR)) - ferr = PX_NO_PANIC; - if (ferr & px_die) { - if (strlen(fm_msg)) - (void) strncat(fm_msg, " and", MSZ); - (void) strncat(fm_msg, px_panic_fab_msg, MSZ); - } - - if (strlen(fm_msg)) - fm_panic("Fatal error has occured in:%s.", fm_msg); -}
--- a/usr/src/uts/sun4/io/px/px_fm.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_fm.h Mon Dec 18 11:06:59 2006 -0800 @@ -2,8 +2,9 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,54 +46,27 @@ /* * Definition of Fire internal error severity - - * HW Reset Errors that cause hardware to automatically reset. Software is - * being reset along, sticky status bits need to be cleaned up upon - * system initialization. - * Panic Errors that definitely result in panic'ing the system. - * Expected Expected error, do not panic, plus do not send ereport. - * Protected Errors SW to determine panic or not, forgivable for safe access. - * Set when SW determines this error is forgivable during safe acc. - * No-panic Errors that don't directly result in panic'ing the system. - * No-Error When an interrupt occured and no errors were seen + * PX_FATAL_HW: errors that automatically cause Fire HW reset, + * PX_FATAL_GOS: errors that causes OS cease to function immediately, + * PX_STUCK_FATAL: errors that is likely to spam, causing hang, + * PX_FATAL_SW: errors that cause partial OS lose function, + * PX_NONFATAL: errors that can be recovered or ignored. */ -#define PX_HW_RESET (0x1 << 5) -#define PX_PANIC (0x1 << 4) -#define PX_EXPECTED (0x1 << 3) -#define PX_PROTECTED (0x1 << 2) -#define PX_NO_PANIC (0x1 << 1) -#define PX_NO_ERROR (0x1 << 0) - -#define PX_HB (0x1 << 2) -#define PX_RP (0x1 << 1) -#define PX_RC (0x1 << 0) +#define PX_FATAL_HW 0x10 +#define PX_FATAL_GOS 0x8 +#define PX_STUCK_FATAL 0x4 +#define PX_FATAL_SW 0x2 +#define PX_NONFATAL 0x1 +#define PX_OK DDI_FM_OK +#define PX_ERR_UNKNOWN 0x80 -/* - * Generic PCIe Root Port Error Handling - * This struct must align with px_pec_err_t in sun4v/io/px/px_err.h - */ -typedef struct px_err_pcie { - uint32_t tx_hdr1; /* sysino */ - uint32_t tx_hdr2; /* sysino */ - uint32_t tx_hdr3; /* ehdl */ - uint32_t tx_hdr4; /* ehdl */ - uint32_t primary_ue; /* stick */ - uint32_t rsvd0; /* stick */ - uint32_t rsvd1; /* pec_desc */ - uint16_t pci_err_status; - uint16_t pcie_err_status; - uint32_t ce_reg; - uint32_t ue_reg; - uint32_t rx_hdr1; /* hdr[0] */ - uint32_t rx_hdr2; /* hdr[0] */ - uint32_t rx_hdr3; /* hdr[1] */ - uint32_t rx_hdr4; /* hdr[1] */ - uint32_t rsvd3; /* err_src_reg */ - uint32_t rsvd4; /* root err status */ -} px_err_pcie_t; +#define PX_FM_FABRIC_CLASS PCIEX_FIRE ".fabric" +#define PX_FM_FABRIC_MSG_CODE "msg_code" +#define PX_FM_FABRIC_REQ_ID "req_id" -#define PX_FM_BLOCK_HOST (0x1 << 0) -#define PX_FM_BLOCK_PCIE (0x1 << 1) -#define PX_FM_BLOCK_ALL (PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE) +#define PX_FABRIC_ERR_SEV(reg, chk, chk_gos) \ + ((reg & chk) ? ((reg & chk_gos) ? \ + PX_FATAL_GOS : PX_FATAL_SW) : PX_NONFATAL) /* * Error handling FMA hook @@ -104,8 +78,10 @@ extern int px_fm_init_child(dev_info_t *, dev_info_t *, int, ddi_iblock_cookie_t *); extern void px_fm_acc_setup(ddi_map_req_t *, dev_info_t *); +extern int px_handle_lookup(dev_info_t *, int, uint64_t, void *); extern int px_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *); -extern int px_err_cmn_intr(px_t *, ddi_fm_error_t *, int, int); +extern int px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, + boolean_t checkjbc); /* * Fire interrupt handlers @@ -119,12 +95,8 @@ * Common error handling functions */ extern void px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr); -extern int px_err_check_eq(dev_info_t *dip); -extern int px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, - px_err_pcie_t *regs); -extern void px_err_panic(int err, int msg, int fab_err); -extern void px_rp_en_q(px_t *px_p, pcie_req_id_t fault_bdf, - uint32_t fault_addr, uint16_t s_status); +#define PX_FM_PANIC \ + if (!panicstr) fm_panic #ifdef __cplusplus }
--- a/usr/src/uts/sun4/io/px/px_pci.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_pci.c Mon Dec 18 11:06:59 2006 -0800 @@ -55,30 +55,9 @@ #include <sys/open.h> #include <sys/stat.h> #include <sys/file.h> -#include <sys/promif.h> /* prom_printf */ #include "pcie_pwr.h" #include "px_pci.h" - -#if defined(DEBUG) -#define DBG pxb_dbg -static void pxb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...); -static uint_t pxb_dbg_print = 0; - -#else /* DEBUG */ - -#define DBG 0 && - -#endif /* DEBUG */ - -typedef enum { /* same sequence as px_debug_sym[] */ - /* 0 */ DBG_ATTACH, - /* 1 */ DBG_PWR -} pxb_debug_bit_t; - -static char *pxb_debug_sym [] = { /* same sequence as px_debug_bit */ - /* 0 */ "attach", - /* 1 */ "pwr" -}; +#include "px_debug.h" /* Tunables. Beware: Some are for debug purpose only. */ /* @@ -220,7 +199,7 @@ static struct modldrv modldrv = { &mod_driverops, /* Type of module */ - "PCIe/PCI nexus driver %I%", + "PCIe/PCI nexus driver 1.29", &pxb_ops, /* driver ops */ }; @@ -605,10 +584,6 @@ int totreg; struct detachspec *ds; struct attachspec *as; - pxb_devstate_t *pxb_p; - - pxb_p = (pxb_devstate_t *)ddi_get_soft_state(pxb_state, - ddi_get_instance(dip)); switch (ctlop) { case DDI_CTLOPS_REPORTDEV: @@ -637,9 +612,6 @@ break; case DDI_CTLOPS_ATTACH: - if (!pcie_is_child(dip, rdip)) - return (DDI_SUCCESS); - as = (struct attachspec *)arg; switch (as->when) { case DDI_PRE: @@ -671,8 +643,6 @@ if (as->cmd == DDI_ATTACH && as->result != DDI_SUCCESS) pcie_pm_release(dip); - pf_init(rdip, (void *)pxb_p->pxb_fm_ibc); - /* * For hotplug-capable slots, we should explicitly * disable the errors, so that we won't panic upon @@ -697,15 +667,8 @@ break; case DDI_CTLOPS_DETACH: - if (!pcie_is_child(dip, rdip)) - return (DDI_SUCCESS); - ds = (struct detachspec *)arg; switch (ds->when) { - case DDI_PRE: - pf_fini(rdip); - return (DDI_SUCCESS); - case DDI_POST: if (ds->cmd == DDI_DETACH && ds->result == DDI_SUCCESS) { @@ -1770,7 +1733,8 @@ pxb_fm_err_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { - return (DDI_FM_OK); + pci_ereport_post(dip, derr, NULL); + return (derr->fme_status); } /* @@ -2050,26 +2014,3 @@ cache_flags)); } #endif /* BCM_SW_WORKAROUNDS */ - -#ifdef DEBUG -static void -pxb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...) -{ - va_list ap; - - if (!(bit & pxb_dbg_print)) - return; - - if (dip) - prom_printf("%s(%d): %s", ddi_driver_name(dip), - ddi_get_instance(dip), pxb_debug_sym[bit]); -body: - va_start(ap, fmt); - if (ap) - prom_vprintf(fmt, ap); - else - prom_printf(fmt); - - va_end(ap); -} -#endif
--- a/usr/src/uts/sun4/io/px/px_space.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_space.c Mon Dec 18 11:06:59 2006 -0800 @@ -30,12 +30,10 @@ */ #include <sys/types.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> #include <sys/cmn_err.h> #include <sys/time.h> #include <sys/pcie.h> -#include "px_obj.h" +#include "px_space.h" /*LINTLIBRARY*/ @@ -80,11 +78,6 @@ uint64_t px_serr_fatal = -1ull; uint64_t px_errtrig_pa = 0x0; -char px_panic_hb_msg[] = " System bus"; -char px_panic_rc_msg[] = " PCIe root complex"; -char px_panic_rp_msg[] = " PCIe root port"; -char px_panic_fab_msg[] = " PCIe fabric"; - /* * The following flag controls behavior of the ino handler routine * when multiple interrupts are attached to a single ino. Typically @@ -164,24 +157,24 @@ uint32_t px_max_l1_tries = PX_MAX_L1_TRIES; -/* Print and Log tunables. The following variables are booleans */ -#ifdef DEBUG -uint32_t px_log = PX_PANIC | PX_NO_PANIC | PX_PROTECTED | PX_HW_RESET; -#else -uint32_t px_log = PX_PANIC; -#endif -uint32_t px_die = PX_PANIC | PX_PROTECTED | PX_HW_RESET; +/* Fire PCIe Error that should cause panics */ +uint32_t px_fabric_die = 1; -/* Fire PCIe Error that should cause panics */ -boolean_t px_fabric_die = B_TRUE; +uint32_t px_fabric_die_rc_ce = 0; +uint32_t px_fabric_die_rc_ue = PCIE_AER_UCE_UR | + PCIE_AER_UCE_TO | + PCIE_AER_UCE_RO | + PCIE_AER_UCE_FCP | + PCIE_AER_UCE_DLP | + PCIE_AER_UCE_ECRC | + PCIE_AER_UCE_PTLP | + PCIE_AER_UCE_MTLP; -/* Root Complex PCIe Error bit flags that should cause panics */ -uint32_t px_fabric_die_rc_ce = 0; -uint32_t px_fabric_die_rc_ue = 0; - -/* Root Complex PCIe Error bit flags that should cause forgiven */ -uint32_t px_fabric_forgive_rc_ce = 0; -uint32_t px_fabric_forgive_rc_ue = 0; +/* Fire PCIe Error that should cause panics even under protected access */ +uint32_t px_fabric_die_rc_ce_gos = 0; +uint32_t px_fabric_die_rc_ue_gos = PCIE_AER_UCE_RO | + PCIE_AER_UCE_FCP | + PCIE_AER_UCE_DLP; /* Fabric Error that should cause panics */ uint32_t px_fabric_die_ce = 0; @@ -191,7 +184,9 @@ PCIE_AER_UCE_RO | PCIE_AER_UCE_FCP | PCIE_AER_UCE_DLP | - PCIE_AER_UCE_TRAINING; + PCIE_AER_UCE_TRAINING | + PCIE_AER_UCE_PTLP | + PCIE_AER_UCE_MTLP; /* Fabric Error that should cause panics even under protected access */ uint32_t px_fabric_die_ce_gos = 0;
--- a/usr/src/uts/sun4/io/px/px_space.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_space.h Mon Dec 18 11:06:59 2006 -0800 @@ -34,12 +34,6 @@ #define PX_SPURINTR_MSG_DEFAULT -1ull -extern char px_panic_hb_msg[]; -extern char px_panic_rc_msg[]; -extern char px_panic_rp_msg[]; -extern char px_panic_fab_msg[]; - -extern uint_t px_max_errorq_size; extern ushort_t px_command_default; extern uint_t px_set_latency_timer_register; extern uint64_t px_perr_fatal; @@ -111,16 +105,12 @@ extern uint32_t px_pwr_pil; extern uint32_t px_max_l1_tries; -/* Print and Log tunables */ -extern uint32_t px_log; -extern uint32_t px_die; - /* Fabric Error that should cause panics */ -extern boolean_t px_fabric_die; +extern uint32_t px_fabric_die; extern uint32_t px_fabric_die_rc_ce; extern uint32_t px_fabric_die_rc_ue; -extern uint32_t px_fabric_forgive_rc_ce; -extern uint32_t px_fabric_forgive_rc_ue; +extern uint32_t px_fabric_die_rc_ce_gos; +extern uint32_t px_fabric_die_rc_ue_gos; extern uint32_t px_fabric_die_ce; extern uint32_t px_fabric_die_ue; extern uint32_t px_fabric_die_ce_gos;
--- a/usr/src/uts/sun4/io/px/px_var.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4/io/px/px_var.h Mon Dec 18 11:06:59 2006 -0800 @@ -146,13 +146,6 @@ /* CPR callback id */ callb_id_t px_cprcb_id; uint32_t px_dma_sync_opt; /* DMA syncing req. of hw */ - - /* Handle for soft intr */ - ddi_softint_handle_t px_dbg_hdl; /* HDL for dbg printing */ - - /* array to keep track of register snapshots during error handling */ - int px_dq_tail; /* last valid index in cs array */ - pf_data_t *px_dq_p; }; /* px soft state flag */
--- a/usr/src/uts/sun4u/io/pci/pci_pci.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/pci/pci_pci.c Mon Dec 18 11:06:59 2006 -0800 @@ -36,8 +36,8 @@ #include <sys/autoconf.h> #include <sys/ddi_impldefs.h> #include <sys/ddi_subrdefs.h> -#include <sys/pcie.h> -#include <sys/pcie_impl.h> +#include <sys/pci.h> +#include <sys/pci_impl.h> #include <sys/pci_cap.h> #include <sys/pci/pci_nexus.h> #include <sys/pci/pci_regs.h> @@ -140,6 +140,7 @@ cred_t *credp, int *rvalp); static int ppb_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, char *name, caddr_t valuep, int *lengthp); +static int ppb_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf); static struct cb_ops ppb_cb_ops = { ppb_open, /* open */ @@ -243,8 +244,6 @@ #define PPB_SOFT_STATE_OPEN_EXCL 0x02 int fm_cap; ddi_iblock_cookie_t fm_ibc; - - uint8_t parent_bus; } ppb_devstate_t; /* @@ -292,7 +291,6 @@ static void ppb_removechild(dev_info_t *); static int ppb_initchild(dev_info_t *child); -static void ppb_uninitchild(dev_info_t *child); static dev_info_t *get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); static void ppb_pwr_setup(ppb_devstate_t *ppb, dev_info_t *dip); static void ppb_pwr_teardown(ppb_devstate_t *ppb, dev_info_t *dip); @@ -547,13 +545,8 @@ pci_regspec_t *drv_regp; int reglen; int rn; - struct attachspec *as; - struct detachspec *ds; + int totreg; - ppb_devstate_t *ppb_p; - - ppb_p = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, - ddi_get_instance(dip)); switch (ctlop) { case DDI_CTLOPS_REPORTDEV: @@ -569,29 +562,7 @@ return (ppb_initchild((dev_info_t *)arg)); case DDI_CTLOPS_UNINITCHILD: - ppb_uninitchild((dev_info_t *)arg); - return (DDI_SUCCESS); - - case DDI_CTLOPS_ATTACH: - if (!pcie_is_child(dip, rdip)) - return (DDI_SUCCESS); - - as = (struct attachspec *)arg; - if ((ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) && - (as->when == DDI_POST)) - pf_init(rdip, ppb_p->fm_ibc); - - return (DDI_SUCCESS); - - case DDI_CTLOPS_DETACH: - if (!pcie_is_child(dip, rdip)) - return (DDI_SUCCESS); - - ds = (struct detachspec *)arg; - if ((ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) && - (ds->when == DDI_PRE)) - pf_fini(rdip); - + ppb_removechild((dev_info_t *)arg); return (DDI_SUCCESS); case DDI_CTLOPS_SIDDEV: @@ -779,6 +750,7 @@ uchar_t header_type; uchar_t min_gnt, latency_timer; ppb_devstate_t *ppb; + pci_parent_data_t *pd_p; /* * Name the child @@ -826,11 +798,11 @@ return (DDI_NOT_WELL_FORMED); } + ddi_set_parent_data(child, NULL); + ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, ddi_get_instance(ddi_get_parent(child))); - ddi_set_parent_data(child, NULL); - /* * If hardware is PM capable, set up the power info structure. * This also ensures the the bus will not be off (0MHz) otherwise @@ -894,7 +866,7 @@ * If the device has a bus control register then program it * based on the settings in the command register. */ - if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { + if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { bcr = pci_config_get8(config_handle, PCI_BCNF_BCNTRL); if (ppb_command_default & PCI_COMM_PARITY_DETECT) bcr |= PCI_BCNF_BCNTRL_PARITY_ENABLE; @@ -945,18 +917,6 @@ } /* - * SPARC PCIe FMA specific - * - * Note: parent_data for parent is created only if this is sparc PCI-E - * platform, for which, SG take a different route to handle device - * errors. - */ - if (ppb->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) { - if (pcie_init_ppd(child) == NULL) - return (DDI_FAILURE); - } - - /* * Check to see if the XMITS/PCI-X workaround applies. */ n = ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_NOTPROM, @@ -969,31 +929,38 @@ pcix_set_cmd_reg(child, n); } - /* since cached, teardown config handle in ppb_uninitchild() */ - return (DDI_SUCCESS); -} - -static void -ppb_uninitchild(dev_info_t *child) -{ - ppb_devstate_t *ppb; - - ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, - ddi_get_instance(ddi_get_parent(child))); + /* Allocate memory for pci parent data */ + pd_p = kmem_zalloc(sizeof (pci_parent_data_t), KM_SLEEP); /* - * SG OPL FMA specific + * Retrieve and save BDF and PCIE2PCI bridge's secondary bus + * information in the parent private data structure. */ - if (ppb->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) - pcie_uninit_ppd(child); + if (ppb_get_bdf_from_dip(child, &pd_p->pci_bdf) != DDI_SUCCESS) { + kmem_free(pd_p, sizeof (pci_parent_data_t)); + pci_config_teardown(&config_handle); + return (DDI_FAILURE); + } - ppb_removechild(child); + pd_p->pci_sec_bus = ddi_prop_get_int(DDI_DEV_T_ANY, child, 0, + "pcie2pci-sec-bus", 0); + + ddi_set_parent_data(child, (void *)pd_p); + pci_config_teardown(&config_handle); + + return (DDI_SUCCESS); } static void ppb_removechild(dev_info_t *dip) { ppb_devstate_t *ppb; + pci_parent_data_t *pd_p; + + if (pd_p = ddi_get_parent_data(dip)) { + ddi_set_parent_data(dip, NULL); + kmem_free(pd_p, sizeof (pci_parent_data_t)); + } ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, ddi_get_instance(ddi_get_parent(dip))); @@ -1681,16 +1648,34 @@ return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp)); } +static int +ppb_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf) +{ + pci_regspec_t *regspec; + int reglen; + + if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, + "reg", (int **)®spec, (uint_t *)®len) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (reglen < (sizeof (pci_regspec_t) / sizeof (int))) { + ddi_prop_free(regspec); + return (DDI_FAILURE); + } + + /* Get phys_hi from first element. All have same bdf. */ + *bdf = (regspec->pci_phys_hi & (PCI_REG_BDFR_M ^ PCI_REG_REG_M)) >> 8; + + ddi_prop_free(regspec); + return (DDI_SUCCESS); +} + /* * Initialize our FMA resources */ static void ppb_fm_init(ppb_devstate_t *ppb_p) { - dev_info_t *root = ddi_root_node(); - dev_info_t *pdip; - char *bus; - ppb_p->fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; @@ -1708,21 +1693,6 @@ * Register error callback with our parent. */ ddi_fm_handler_register(ppb_p->dip, ppb_err_callback, NULL); - - ppb_p->parent_bus = PCIE_PCIECAP_DEV_TYPE_PCI_DEV; - for (pdip = ddi_get_parent(ppb_p->dip); pdip && (pdip != root) && - (ppb_p->parent_bus != PCIE_PCIECAP_DEV_TYPE_PCIE_DEV); - pdip = ddi_get_parent(pdip)) { - if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, - DDI_PROP_DONTPASS, "device_type", &bus) != - DDI_PROP_SUCCESS) - break; - - if (strcmp(bus, "pciex") == 0) - ppb_p->parent_bus = PCIE_PCIECAP_DEV_TYPE_PCIE_DEV; - - ddi_prop_free(bus); - } } /* @@ -1760,18 +1730,6 @@ static int ppb_err_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { - ppb_devstate_t *ppb_p = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, - ddi_get_instance(dip)); - - /* - * errors handled by SPARC PCI-E framework for PCIe platforms - */ - if (ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) - return (DDI_FM_OK); - - /* - * do the following for SPARC PCI platforms - */ ASSERT(impl_data == NULL); pci_ereport_post(dip, derr, NULL); return (derr->fme_status);
--- a/usr/src/uts/sun4u/io/px/px_err.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/px/px_err.c Mon Dec 18 11:06:59 2006 -0800 @@ -133,37 +133,37 @@ PX_ERPT_SEND(erpt), \ PX_ERR_JBC_CLASS(bit) px_err_bit_desc_t px_err_jbc_tbl[] = { - /* JBC FATAL */ - { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, - { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, + /* JBC FATAL - see io erpt doc, section 1.1 */ + { JBC_BIT_DESC(MB_PEA, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(CPE, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(APE, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(PIO_CPE, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(JTCEEW, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(JTCEEI, fatal_hw, jbc_fatal) }, + { JBC_BIT_DESC(JTCEER, fatal_hw, jbc_fatal) }, - /* JBC MERGE */ + /* JBC MERGE - see io erpt doc, section 1.2 */ { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, - /* JBC Jbusint IN */ - { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, - { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, - { JBC_BIT_DESC(JTE, panic, jbc_in) }, - { JBC_BIT_DESC(JBE, panic, jbc_in) }, - { JBC_BIT_DESC(JUE, panic, jbc_in) }, - { JBC_BIT_DESC(ICISE, panic, jbc_in) }, + /* JBC Jbusint IN - see io erpt doc, section 1.3 */ + { JBC_BIT_DESC(UE_ASYN, fatal_gos, jbc_in) }, + { JBC_BIT_DESC(CE_ASYN, non_fatal, jbc_in) }, + { JBC_BIT_DESC(JTE, fatal_gos, jbc_in) }, + { JBC_BIT_DESC(JBE, jbc_jbusint_in, jbc_in) }, + { JBC_BIT_DESC(JUE, jbc_jbusint_in, jbc_in) }, + { JBC_BIT_DESC(ICISE, fatal_gos, jbc_in) }, { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, - { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, - { JBC_BIT_DESC(BJC, panic, jbc_in) }, + { JBC_BIT_DESC(ILL_BMW, jbc_jbusint_in, jbc_in) }, + { JBC_BIT_DESC(ILL_BMR, jbc_jbusint_in, jbc_in) }, + { JBC_BIT_DESC(BJC, jbc_jbusint_in, jbc_in) }, - /* JBC Jbusint Out */ - { JBC_BIT_DESC(IJP, panic, jbc_out) }, + /* JBC Jbusint Out - see io erpt doc, section 1.4 */ + { JBC_BIT_DESC(IJP, fatal_gos, jbc_out) }, /* - * JBC Dmcint ODCD + * JBC Dmcint ODCD - see io erpt doc, section 1.5 * * Error bits which can be set via a bad PCItool access go through * jbc_safe_acc instead. @@ -172,15 +172,15 @@ { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, - { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, + { JBC_BIT_DESC(PIO_CPE, non_fatal, jbc_odcd) }, { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, - /* JBC Dmcint IDC */ - { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, - { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, + /* JBC Dmcint IDC - see io erpt doc, section 1.6 */ + { JBC_BIT_DESC(UNSOL_RD, non_fatal, jbc_idc) }, + { JBC_BIT_DESC(UNSOL_INTR, non_fatal, jbc_idc) }, - /* JBC CSR */ - { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } + /* JBC CSR - see io erpt doc, section 1.7 */ + { JBC_BIT_DESC(EBUS_TO, jbc_csr, jbc_csr) } }; #define px_err_jbc_keys \ @@ -202,17 +202,17 @@ PX_ERR_UBC_CLASS(bit) px_err_bit_desc_t px_err_ubc_tbl[] = { /* UBC FATAL */ - { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, - { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, - { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, - { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, - { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, - { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, - { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, - { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, - { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, - { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, - { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } + { UBC_BIT_DESC(DMARDUEA, non_fatal, ubc_fatal) }, + { UBC_BIT_DESC(DMAWTUEA, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(MEMRDAXA, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(MEMWTAXA, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(DMARDUEB, non_fatal, ubc_fatal) }, + { UBC_BIT_DESC(DMAWTUEB, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(MEMRDAXB, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(MEMWTAXB, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(PIOWTUE, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(PIOWBEUE, fatal_sw, ubc_fatal) }, + { UBC_BIT_DESC(PIORBEUE, fatal_sw, ubc_fatal) } }; #define px_err_ubc_keys \ @@ -242,20 +242,20 @@ PX_ERPT_SEND(erpt), \ PX_ERR_DMC_CLASS(bit) px_err_bit_desc_t px_err_imu_tbl[] = { - /* DMC IMU RDS */ - { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, - { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, - { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, - { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, - { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, - { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, - { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, - { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, + /* DMC IMU RDS - see io erpt doc, section 2.1 */ + { IMU_BIT_DESC(MSI_MAL_ERR, non_fatal, imu_rds) }, + { IMU_BIT_DESC(MSI_PAR_ERR, fatal_stuck, imu_rds) }, + { IMU_BIT_DESC(PMEACK_MES_NOT_EN, imu_rbne, imu_rds) }, + { IMU_BIT_DESC(PMPME_MES_NOT_EN, imu_pme, imu_rds) }, + { IMU_BIT_DESC(FATAL_MES_NOT_EN, imu_rbne, imu_rds) }, + { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, imu_rbne, imu_rds) }, + { IMU_BIT_DESC(COR_MES_NOT_EN, imu_rbne, imu_rds) }, + { IMU_BIT_DESC(MSI_NOT_EN, imu_rbne, imu_rds) }, - /* DMC IMU SCS */ - { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_rds) }, + /* DMC IMU SCS - see io erpt doc, section 2.2 */ + { IMU_BIT_DESC(EQ_NOT_EN, imu_rbne, imu_rds) }, - /* DMC IMU */ + /* DMC IMU - see io erpt doc, section 2.3 */ { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } }; @@ -274,21 +274,21 @@ PX_ERPT_SEND(erpt), \ PX_ERR_DMC_CLASS(bit) px_err_bit_desc_t px_err_mmu_tbl[] = { - /* DMC MMU TFAR/TFSR */ + /* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TRN_ERR, mmu_rbne, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TTC_DPE, mmu_tfa, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_DME, mmu_tblwlk, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_UDE, mmu_tblwlk, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_ERR, mmu_tblwlk, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_DPE, mmu_tblwlk, mmu_tfar_tfsr) }, - /* DMC MMU */ - { MMU_BIT_DESC(TTC_CAE, panic, mmu) } + /* DMC MMU - see io erpt doc, section 2.5 */ + { MMU_BIT_DESC(TTC_CAE, non_fatal, mmu) } }; #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) @@ -308,8 +308,8 @@ PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_ilu_tbl[] = { - /* PEC ILU none */ - { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } + /* PEC ILU none - see io erpt doc, section 3.1 */ + { ILU_BIT_DESC(IHB_PE, fatal_gos, pec_ilu) } }; #define px_err_ilu_keys \ (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) @@ -342,20 +342,20 @@ PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_tlu_ue_tbl[] = { - /* PCI-E Receive Uncorrectable Errors */ + /* PCI-E Receive Uncorrectable Errors - see io erpt doc, section 3.2 */ { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, - /* PCI-E Transmit Uncorrectable Errors */ + /* PCI-E Transmit Uncorrectable Errors - see io erpt doc, section 3.3 */ { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, - /* PCI-E Rx/Tx Uncorrectable Errors */ + /* PCI-E Rx/Tx Uncorrectable Errors - see io erpt doc, section 3.4 */ { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, - /* Other PCI-E Uncorrectable Errors */ + /* Other PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, @@ -384,7 +384,7 @@ PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_tlu_ce_tbl[] = { - /* PCI-E Correctable Errors */ + /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, @@ -419,29 +419,31 @@ PX_ERPT_SEND(erpt), \ PX_ERR_PEC_OB_CLASS(bit) px_err_bit_desc_t px_err_tlu_oe_tbl[] = { - /* TLU Other Event Status (receive only) */ - { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, - - /* TLU Other Event Status (rx + tx) */ - { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, - { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, - { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, + /* + * TLU Other Event Status (receive only) - see io erpt doc, section 3.7 + */ + { TLU_OE_BIT_DESC(MRC, fatal_hw, pciex_rx_oe) }, - /* TLU Other Event */ - { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, - { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, - { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, + /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ + { TLU_OE_BIT_DESC(WUC, non_fatal, pciex_rx_tx_oe) }, + { TLU_OE_BIT_DESC(RUC, non_fatal, pciex_rx_tx_oe) }, + { TLU_OE_BIT_DESC(CRS, non_fatal, pciex_rx_tx_oe) }, + + /* TLU Other Event - see io erpt doc, section 3.9 */ + { TLU_OE_BIT_DESC(IIP, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(EDP, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(EHP, fatal_gos, pciex_oe) }, + { TLU_OE_OB_BIT_DESC(TLUEITMO, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(LIN, non_fatal, pciex_oe) }, + { TLU_OE_BIT_DESC(LRS, non_fatal, pciex_oe) }, { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, - { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, - { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } + { TLU_OE_BIT_DESC(ERU, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(ERO, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(EMP, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(EPE, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(ERP, fatal_gos, pciex_oe) }, + { TLU_OE_BIT_DESC(EIP, fatal_gos, pciex_oe) } }; #define px_err_tlu_oe_keys \ @@ -631,7 +633,7 @@ uint64_t err_status[PX_ERR_REG_KEYS]; } px_err_ss_t; -static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); +static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, boolean_t chk_cb); static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss); static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, @@ -642,7 +644,10 @@ * Interrupt handler for the JBC/UBC block. * o lock * o create derr - * o px_err_cmn_intr + * o px_err_handle(leaf1, with cb) + * o px_err_handle(leaf2, without cb) + * o dispatch (leaf1) + * o dispatch (leaf2) * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ @@ -652,7 +657,9 @@ px_fault_t *px_fault_p = (px_fault_t *)arg; dev_info_t *rpdip = px_fault_p->px_fh_dip; px_t *px_p = DIP_TO_STATE(rpdip); - int err; + int err = PX_OK; + int ret = DDI_FM_OK; + int fatal = 0; ddi_fm_error_t derr; /* Create the derr */ @@ -663,24 +670,44 @@ mutex_enter(&px_p->px_fm_mutex); - err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); + err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); + + ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + switch (ret) { + case DDI_FM_FATAL: + fatal++; + break; + case DDI_FM_NONFATAL: + case DDI_FM_UNKNOWN: + default: + break; + } + + /* Set the intr state to idle for the leaf that received the mondo */ + (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, INTR_IDLE_STATE); mutex_exit(&px_p->px_fm_mutex); - px_err_panic(err, PX_HB, PX_NO_ERROR); + /* + * PX_FATAL_HW error is diagnosed after system recovered from + * HW initiated reset, therefore no furthur handling is required. + */ + if (fatal || err & (PX_FATAL_GOS | PX_FATAL_SW)) + PX_FM_PANIC("Fatal System Bus Error has occurred\n"); return (DDI_INTR_CLAIMED); } + /* * px_err_dmc_pec_intr: * Interrupt handler for the DMC/PEC block. * o lock * o create derr - * o px_err_cmn_intr(leaf, with out cb) - * o pcie_scan_fabric (leaf) + * o px_err_handle(leaf, with cb) + * o dispatch (leaf) * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ @@ -690,7 +717,8 @@ px_fault_t *px_fault_p = (px_fault_t *)arg; dev_info_t *rpdip = px_fault_p->px_fh_dip; px_t *px_p = DIP_TO_STATE(rpdip); - int rc_err, fab_err = PF_NO_PANIC; + int err = PX_OK; + int ret = DDI_FM_OK; ddi_fm_error_t derr; /* Create the derr */ @@ -702,12 +730,11 @@ mutex_enter(&px_p->px_fm_mutex); /* send ereport/handle/clear fire registers */ - rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); + err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); /* Check all child devices for errors */ if (!px_lib_is_in_drain_state(px_p)) { - fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, - &px_p->px_dq_tail); + ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); } /* Set the interrupt state to idle */ @@ -716,7 +743,12 @@ mutex_exit(&px_p->px_fm_mutex); - px_err_panic(rc_err, PX_RC, fab_err); + /* + * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, + * therefore it does not cause panic. + */ + if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) + PX_FM_PANIC("Fatal System Port Error has occurred\n"); return (DDI_INTR_CLAIMED); } @@ -799,31 +831,31 @@ } /* - * px_err_cmn_intr: + * px_err_handle: * Common function called by trap, mondo and fabric intr. * o Snap shot current fire registers * o check for safe access * o send ereport and clear snap shot registers - * o create and queue RC info for later use in fabric scan. - * o RUC/WUC, PTLP, MMU Errors(CA), UR * o check severity of snap shot registers * * @param px_p leaf in which to check access * @param derr fm err data structure to be updated * @param caller PX_TRAP_CALL | PX_INTR_CALL - * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL - * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED + * @param chk_cb whether to handle cb registers + * @return err PX_OK | PX_NONFATAL | + * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL */ int -px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) +px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, + boolean_t chk_cb) { px_err_ss_t ss = {0}; - int err; + int err = PX_OK; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); /* snap shot the current fire registers */ - px_err_snapshot(px_p, &ss, block); + px_err_snapshot(px_p, &ss, chk_cb); /* check for safe access */ px_err_safeacc_check(px_p, derr); @@ -835,7 +867,7 @@ err = px_err_check_severity(px_p, derr, err, caller); /* Mark the On Trap Handle if an error occured */ - if (err != PX_NO_ERROR) { + if (err != PX_OK) { px_pec_t *pec_p = px_p->px_pec_p; on_trap_data_t *otd = pec_p->pec_ontrap_data; @@ -853,19 +885,18 @@ /* * px_err_snapshot: * Take a current snap shot of all the fire error registers. This includes - * JBC/UBC, DMC, and PEC depending on the block flag + * JBC/UBC, DMC, and PEC, unless chk_cb == false; * * @param px_p leaf in which to take the snap shot. * @param ss pre-allocated memory to store the snap shot. * @param chk_cb boolean on whether to store jbc/ubc register. */ static void -px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) +px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, boolean_t chk_cb) { pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; - caddr_t csr_base; uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; px_err_id_t reg_id; @@ -873,20 +904,11 @@ for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { if (!(reg_desc_p->chip_mask & chip_mask)) continue; - - if ((block & PX_FM_BLOCK_HOST) && - (reg_desc_p->reg_bank == PX_REG_XBC)) - csr_base = xbc_csr_base; - else if ((block & PX_FM_BLOCK_PCIE) && - (reg_desc_p->reg_bank == PX_REG_CSR)) - csr_base = pec_csr_base; - else { - ss_p->err_status[reg_id] = 0; - continue; - } - - ss_p->err_status[reg_id] = CSR_XR(csr_base, - reg_desc_p->status_addr); + ss_p->err_status[reg_id] = + (reg_desc_p->reg_bank == PX_REG_CSR) ? + CSR_XR(pec_csr_base, reg_desc_p->status_addr) : + (chk_cb ? + CSR_XR(xbc_csr_base, reg_desc_p->status_addr) : 0); } } @@ -912,15 +934,15 @@ px_err_bit_desc_t *err_bit_tbl; px_err_bit_desc_t *err_bit_desc; - uint64_t *count_mask; - uint64_t clear_addr; + uint64_t *log_mask, *count_mask; + uint64_t status_addr, clear_addr; uint64_t ss_reg; int (*err_handler)(); int (*erpt_handler)(); - int reg_id, key; - int err = PX_NO_ERROR; - int biterr = 0; + px_err_id_t reg_id, key; + int err = PX_OK; + int biterr; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); @@ -936,55 +958,60 @@ /* Get the correct CSR BASE */ csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; - /* If there are no errors in this register, continue */ + /* Get pointers to masks and register addresses */ + log_mask = err_reg_tbl->log_mask_p; + count_mask = err_reg_tbl->count_mask_p; + status_addr = err_reg_tbl->status_addr; + clear_addr = err_reg_tbl->clear_addr; ss_reg = ss_p->err_status[reg_id]; - if (!ss_reg) - continue; - - /* Get pointers to masks and register addresses */ - count_mask = err_reg_tbl->count_mask_p; - clear_addr = err_reg_tbl->clear_addr; /* Get the register BIT description table */ err_bit_tbl = err_reg_tbl->err_bit_tbl; /* For each known bit in the register send erpt and handle */ for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { + /* Get the bit description table for this register */ + err_bit_desc = &err_bit_tbl[key]; + /* * If the ss_reg is set for this bit, * send ereport and handle */ - err_bit_desc = &err_bit_tbl[key]; - if (!BIT_TST(ss_reg, err_bit_desc->bit)) - continue; + if (BIT_TST(ss_reg, err_bit_desc->bit)) { + /* Increment the counter if necessary */ + if (BIT_TST(*count_mask, err_bit_desc->bit)) { + err_bit_desc->counter++; + } - /* Increment the counter if necessary */ - if (BIT_TST(*count_mask, err_bit_desc->bit)) { - err_bit_desc->counter++; - } + /* Error Handle for this bit */ + err_handler = err_bit_desc->err_handler; + if (err_handler) { + biterr = err_handler(rpdip, + csr_base, + derr, + err_reg_tbl, + err_bit_desc); + err |= biterr; + } - /* Error Handle for this bit */ - err_handler = err_bit_desc->err_handler; - if (err_handler) { - biterr = err_handler(rpdip, csr_base, derr, - err_reg_tbl, err_bit_desc); - err |= biterr; + /* Send the ereport if it's an UNEXPECTED err */ + erpt_handler = err_bit_desc->erpt_handler; + if ((derr->fme_flag == DDI_FM_ERR_UNEXPECTED) && + (biterr != PX_OK)) { + if (erpt_handler) + (void) erpt_handler(rpdip, + csr_base, + ss_reg, + derr, + err_bit_desc->bit, + err_bit_desc->class_name); + } } - - /* - * Send the ereport if it's an UNEXPECTED err. - * This is the only place where PX_EXPECTED is utilized. - */ - erpt_handler = err_bit_desc->erpt_handler; - if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || - (biterr == PX_EXPECTED)) - continue; - - if (erpt_handler) - (void) erpt_handler(rpdip, csr_base, ss_reg, - derr, err_bit_desc->bit, - err_bit_desc->class_name); } + /* Print register status */ + if (ss_reg & *log_mask) + DBG(DBG_ERR_INTR, rpdip, "<%x>=%16llx %s\n", + status_addr, ss_reg, err_reg_tbl->msg); /* Clear the register and error */ CSR_XS(csr_base, clear_addr, ss_reg); @@ -1008,12 +1035,8 @@ px_pec_t *pec_p = px_p->px_pec_p; boolean_t is_safeacc = B_FALSE; - /* - * Nothing to do if called with no error. - * The err could have already been set to PX_NO_PANIC, which means the - * system doesn't need to panic, but PEEK/POKE still failed. - */ - if (err == PX_NO_ERROR) + /* nothing to do if called with no error */ + if (err == PX_OK) return (err); /* Cautious access error handling */ @@ -1048,103 +1071,84 @@ is_safeacc = B_FALSE; } - /* re-adjust error status from safe access, forgive all errors */ - if (is_safeacc) - return (PX_NO_PANIC); + /* + * The third argument "err" is passed in as error status from checking + * Fire register, re-adjust error status from safe access. + */ + if (is_safeacc && !(err & PX_FATAL_GOS)) + return (PX_NONFATAL); return (err); } /* predefined convenience functions */ /* ARGSUSED */ -void -px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr, char *msg) +int +px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) { - DBG(DBG_ERR_INTR, rpdip, - "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " - "of \"%s\"\n", - err_bit_descr->bit, err_bit_descr->class_name, - err_reg_descr->msg, err_reg_descr->status_addr, - err_bit_descr->counter, msg); + return (PX_FATAL_HW); } /* ARGSUSED */ int -px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - if (px_log & PX_HW_RESET) { - px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, - "HW RESET"); - } + return (PX_FATAL_GOS); +} - return (PX_HW_RESET); +/* ARGSUSED */ +int +px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) +{ + return (PX_STUCK_FATAL); } /* ARGSUSED */ int -px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - if (px_log & PX_PANIC) { - px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); - } - - return (PX_PANIC); + return (PX_FATAL_SW); } /* ARGSUSED */ int -px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - if (px_log & PX_PROTECTED) { - px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, - "PROTECTED"); - } - - return (PX_PROTECTED); + return (PX_NONFATAL); } /* ARGSUSED */ int -px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) +px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, + px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - if (px_log & PX_NO_PANIC) { - px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, - "NO PANIC"); - } - - return (PX_NO_PANIC); + return (PX_OK); } /* ARGSUSED */ int -px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) +px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, + px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - if (px_log & PX_NO_ERROR) { - px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, - "NO ERROR"); - } - - return (PX_NO_ERROR); + return (PX_ERR_UNKNOWN); } /* ARGSUSED */ PX_ERPT_SEND_DEC(do_not) { - return (PX_NO_ERROR); + return (PX_OK); } - /* UBC FATAL - see io erpt doc, section 1.1 */ /* ARGSUSED */ PX_ERPT_SEND_DEC(ubc_fatal) @@ -1242,10 +1246,10 @@ NULL); } - return (PX_NO_PANIC); + return (PX_OK); } -/* JBC FATAL */ +/* JBC FATAL - see io erpt doc, section 1.1 */ PX_ERPT_SEND_DEC(jbc_fatal) { char buf[FM_MAX_CLASS]; @@ -1269,10 +1273,10 @@ CSR_XR(csr_base, FATAL_ERROR_LOG_2), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* JBC MERGE */ +/* JBC MERGE - see io erpt doc, section 1.2 */ PX_ERPT_SEND_DEC(jbc_merge) { char buf[FM_MAX_CLASS]; @@ -1294,38 +1298,37 @@ CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* - * JBC Merge buffer retryable errors: - * Merge buffer parity error (rd_buf): PIO or DMA - * Merge buffer parity error (wr_buf): PIO or DMA + * JBC Merge buffer nonfatal errors: + * Merge buffer parity error (rd_buf): dma:read:M:nonfatal + * Merge buffer parity error (wr_buf): dma:write:M:nonfatal */ /* ARGSUSED */ int px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) { - /* - * Holder function to attempt error recovery. When the features - * are in place, look up the address of the transaction in: - * - * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); - * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; - * - * If the error is a secondary error, there is no log information - * just panic as it is unknown which address has been affected. - * - * Remember the address is pretranslation and might be hard to look - * up the appropriate driver based on the PA. - */ - return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, - err_bit_descr)); + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t paddr; + int ret; + + if (!pri) + return (PX_FATAL_GOS); + + paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); + paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; + + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); + + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); } -/* JBC Jbusint IN */ +/* JBC Jbusint IN - see io erpt doc, section 1.3 */ PX_ERPT_SEND_DEC(jbc_in) { char buf[FM_MAX_CLASS]; @@ -1349,39 +1352,45 @@ CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* - * JBC Jbusint IN retryable errors + * JBC Jbusint IN nonfatal errors: PA logged in Jbusint In Transaction Error * Log Reg[42:0]. - * Write Data Parity Error: PIO Writes - * Read Data Parity Error: DMA Reads + * CE async fault error: nonfatal + * Jbus bus error: dma::nonfatal + * Jbus unmapped error: pio|dma:rdwr:M:nonfatal + * Write data parity error: pio/write:M:nonfatal + * Read data parity error: pio/read:M:nonfatal + * Illegal NCWR bytemask: pio:write:M:nonfatal + * Illegal NCRD bytemask: pio:write:M:nonfatal + * Invalid jbus transaction: nonfatal */ +/* ARGSUSED */ int px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - /* - * Holder function to attempt error recovery. When the features - * are in place, look up the address of the transaction in: - * - * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); - * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; - * - * If the error is a secondary error, there is no log information - * just panic as it is unknown which address has been affected. - * - * Remember the address is pretranslation and might be hard to look - * up the appropriate driver based on the PA. - */ - return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, - err_bit_descr)); + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t paddr; + int ret; + + if (!pri) + return (PX_FATAL_GOS); + + paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); + paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; + + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); + + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); } -/* JBC Jbusint Out */ +/* JBC Jbusint Out - see io erpt doc, section 1.4 */ PX_ERPT_SEND_DEC(jbc_out) { char buf[FM_MAX_CLASS]; @@ -1405,10 +1414,10 @@ CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* JBC Dmcint ODCD */ +/* JBC Dmcint ODCD - see io erpt doc, section 1.5 */ PX_ERPT_SEND_DEC(jbc_odcd) { char buf[FM_MAX_CLASS]; @@ -1430,12 +1439,12 @@ CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* * JBC Dmcint ODCO nonfatal errer handling - - * PIO data parity error: PIO + * PIO data parity error: pio:write:M:nonfatal */ /* ARGSUSED */ int @@ -1443,21 +1452,20 @@ ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - /* - * Holder function to attempt error recovery. When the features - * are in place, look up the address of the transaction in: - * - * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); - * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; - * - * If the error is a secondary error, there is no log information - * just panic as it is unknown which address has been affected. - * - * Remember the address is pretranslation and might be hard to look - * up the appropriate driver based on the PA. - */ - return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, - err_bit_descr)); + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t paddr; + int ret; + + if (!pri) + return (PX_FATAL_GOS); + + paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); + paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; + + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); + + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); } /* Does address in DMCINT error log register match address of pcitool access? */ @@ -1491,8 +1499,7 @@ boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); if (!pri) - return (px_err_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); + return (PX_FATAL_GOS); /* * Got an error which is forgivable during a PCItool access. * @@ -1505,14 +1512,13 @@ */ if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && (px_jbc_pcitool_addr_match(rpdip, csr_base))) - return (px_err_protected_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); + return (PX_FATAL_SW); return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, err_reg_descr, err_bit_descr)); } -/* JBC Dmcint IDC */ +/* JBC Dmcint IDC - see io erpt doc, section 1.6 */ PX_ERPT_SEND_DEC(jbc_idc) { char buf[FM_MAX_CLASS]; @@ -1534,10 +1540,10 @@ CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* JBC CSR */ +/* JBC CSR - see io erpt doc, section 1.7 */ PX_ERPT_SEND_DEC(jbc_csr) { char buf[FM_MAX_CLASS]; @@ -1559,10 +1565,38 @@ CSR_XR(csr_base, CSR_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* DMC IMU RDS */ +/* + * JBC CSR errer handling - + * Ebus ready timeout error: pio:rdwr:M:nonfatal + */ +/* ARGSUSED */ +int +px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) +{ + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t paddr; + int ret; + + if (!pri) + return (PX_FATAL_GOS); + + paddr = CSR_XR(csr_base, CSR_ERROR_LOG); + paddr &= CSR_ERROR_LOG_ADDRESS_MASK; + + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); + + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); +} + +/* JBC Dmcint IDC - see io erpt doc, section 1.6 */ + +/* DMC IMU RDS - see io erpt doc, section 2.1 */ PX_ERPT_SEND_DEC(imu_rds) { char buf[FM_MAX_CLASS]; @@ -1584,7 +1618,53 @@ CSR_XR(csr_base, IMU_RDS_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); +} + +/* imu function to handle all Received but Not Enabled errors */ +/* ARGSUSED */ +int +px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) +{ + uint64_t imu_log_enable, imu_intr_enable; + int mask = BITMASK(err_bit_descr->bit); + int err = PX_NONFATAL; + + imu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); + imu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); + + /* + * If matching bit is not set, meaning corresponding rbne not + * enabled, then receiving it indicates some sort of malfunction + * possibly in hardware. + * + * Other wise, software may have intentionally disabled certain + * errors for a period of time within which the occuring of the + * disabled errors become rbne, that is non fatal. + */ + + if (!(imu_log_enable & imu_intr_enable & mask)) + err = PX_FATAL_GOS; + + return (err); +} + +/* + * No platforms uses PME. Any PME received is simply logged + * for analysis. + */ +/* ARGSUSED */ +int +px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) +{ + px_t *px_p = DIP_TO_STATE(rpdip); + + px_p->px_pme_ignored++; + return (PX_NONFATAL); } /* handle EQ overflow */ @@ -1594,20 +1674,27 @@ ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - px_t *px_p = DIP_TO_STATE(rpdip); - pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; - int err = px_err_check_eq(rpdip); + px_t *px_p = DIP_TO_STATE(rpdip); + px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; + msiqid_t eqno; + pci_msiq_state_t msiq_state; + int err = PX_NONFATAL; + int i; - if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { - return (px_err_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - } else { - return (px_err_no_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); + eqno = msiq_state_p->msiq_1st_msiq_id; + for (i = 0; i < msiq_state_p->msiq_cnt; i++) { + if (px_lib_msiq_getstate(rpdip, eqno, &msiq_state) == + DDI_SUCCESS) { + if (msiq_state == PCI_MSIQ_STATE_ERROR) { + err = PX_FATAL_GOS; + } + } } + + return (err); } -/* DMC IMU SCS */ +/* DMC IMU SCS - see io erpt doc, section 2.2 */ PX_ERPT_SEND_DEC(imu_scs) { char buf[FM_MAX_CLASS]; @@ -1629,10 +1716,10 @@ CSR_XR(csr_base, IMU_SCS_ERROR_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* DMC IMU */ +/* DMC IMU - see io erpt doc, section 2.3 */ PX_ERPT_SEND_DEC(imu) { char buf[FM_MAX_CLASS]; @@ -1652,27 +1739,14 @@ CSR_XR(csr_base, IMU_ERROR_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* DMC MMU TFAR/TFSR */ +/* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ PX_ERPT_SEND_DEC(mmu_tfar_tfsr) { char buf[FM_MAX_CLASS]; boolean_t pri = PX_ERR_IS_PRI(bit); - px_t *px_p = DIP_TO_STATE(rpdip); - pcie_req_id_t fault_bdf = 0; - uint16_t s_status = 0; - - if (pri) { - fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) - & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << - MMU_TRANSLATION_FAULT_STATUS_ID); - s_status = PCI_STAT_S_TARG_AB; - - /* Only PIO Fault Addresses are valid, this is DMA */ - (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); - } (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); @@ -1693,10 +1767,10 @@ CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* DMC MMU */ +/* DMC MMU - see io erpt doc, section 2.5 */ PX_ERPT_SEND_DEC(mmu) { char buf[FM_MAX_CLASS]; @@ -1716,153 +1790,112 @@ CSR_XR(csr_base, MMU_ERROR_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* - * IMU function to handle all Received but Not Enabled errors. - * - * These errors are due to transactions modes in which the PX driver was not - * setup to be able to do. If possible, inform the driver that their DMA has - * failed by marking their DMA handle as failed, but do not panic the system. - * Most likely the address is not valid, as Fire wasn't setup to handle them in - * the first place. - * - * These errors are not retryable, unless the PX mode has changed, otherwise the - * same error will occur again. - */ +/* imu function to handle all Received but Not Enabled errors */ int px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - pcie_req_id_t bdf; + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t mmu_log_enable, mmu_intr_enable; + uint64_t mask = BITMASK(err_bit_descr->bit); + uint64_t mmu_tfa, mmu_ctrl; + uint64_t mmu_enable_bit = 0; + int err = PX_NONFATAL; + int ret; + + mmu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); + mmu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); - if (!PX_ERR_IS_PRI(err_bit_descr->bit)) - goto done; + mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); + mmu_ctrl = CSR_XR(csr_base, MMU_CONTROL_AND_STATUS); + + switch (err_bit_descr->bit) { + case MMU_INTERRUPT_STATUS_BYP_ERR_P: + mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_BE); + break; + case MMU_INTERRUPT_STATUS_TRN_ERR_P: + mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_TE); + break; + default: + mmu_enable_bit = 0; + break; + } - bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); - (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, - bdf); + /* + * If the interrupts are enabled and Translation/Bypass Enable bit + * was set, then panic. This error should not have occured. + */ + if (mmu_log_enable & mmu_intr_enable & + (mmu_ctrl & mmu_enable_bit)) { + err = PX_FATAL_GOS; + } else { + if (!pri) + return (PX_FATAL_GOS); + + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); + err = (ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL; -done: - return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, - err_bit_descr)); + /* + * S/W bug - this error should always be enabled + */ + + /* enable error & intr reporting for this bit */ + CSR_XS(csr_base, MMU_ERROR_LOG_ENABLE, mmu_log_enable | mask); + CSR_XS(csr_base, MMU_INTERRUPT_ENABLE, mmu_intr_enable | mask); + + /* enable translation access/bypass enable */ + CSR_XS(csr_base, MMU_CONTROL_AND_STATUS, + mmu_ctrl | mmu_enable_bit); + } + + return (err); } -/* - * IMU function to handle all invalid address errors. - * - * These errors are due to transactions in which the address is not recognized. - * If possible, inform the driver that all DMAs have failed by marking their DMA - * handles. Fire should not panic the system, it'll be up to the driver to - * panic. The address logged is invalid. - * - * These errors are not retryable since retrying the same transaction with the - * same invalid address will result in the same error. - */ +/* Generic error handling functions that involve MMU Translation Fault Addr */ /* ARGSUSED */ int px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - pcie_req_id_t bdf; - - if (!PX_ERR_IS_PRI(err_bit_descr->bit)) - goto done; + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t mmu_tfa; + uint_t ret; - bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); - (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, - bdf); + if (!pri) + return (PX_FATAL_GOS); -done: - return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, - err_bit_descr)); + mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); + + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); } -/* - * IMU function to handle normal transactions that encounter a parity error. - * - * These errors are due to transactions that enouter a parity error. If - * possible, inform the driver that their DMA have failed and that they should - * retry. If Fire is unable to contact the leaf driver, panic the system. - * Otherwise, it'll be up to the device to determine is this is a panicable - * error. - */ +/* MMU Table walk errors */ /* ARGSUSED */ int -px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - uint64_t mmu_tfa; - pcie_req_id_t bdf; - int status = DDI_FM_UNKNOWN; + boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); + uint64_t mmu_tfa; + uint_t ret; - if (!PX_ERR_IS_PRI(err_bit_descr->bit)) - goto done; + if (!pri) + return (PX_FATAL_GOS); mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); - bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); - status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, - (uint32_t)mmu_tfa, bdf); - -done: - if (status == DDI_FM_UNKNOWN) - return (px_err_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - else - return (px_err_no_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); -} - -/* - * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" - */ -/* ARGSUSED */ -int -px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) -{ - px_t *px_p = DIP_TO_STATE(rpdip); - pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; - uint64_t data; - uint32_t addr, hdr; - pcie_tlp_hdr_t *tlp; - int sts = PF_HDL_NOTFOUND; + ret = px_handle_lookup( + rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); - if (!PX_ERR_IS_PRI(err_bit_descr->bit)) - goto done; - - data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); - hdr = (uint32_t)(data >> 32); - tlp = (pcie_tlp_hdr_t *)&hdr; - data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); - addr = (uint32_t)(data >> 32); - - switch (tlp->type) { - case PCIE_TLP_TYPE_IO: - case PCIE_TLP_TYPE_MEM: - case PCIE_TLP_TYPE_MEMLK: - sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_PIO_ADDR, - addr, NULL); - break; - case PCIE_TLP_TYPE_CFG0: - case PCIE_TLP_TYPE_CFG1: - sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_CFG_ADDR, - addr, (addr >> 16)); - break; - } - -done: - if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) - return (px_err_protected_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - - return (px_err_no_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); + return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); } /* @@ -1884,7 +1917,7 @@ * error condition. */ return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? - PX_NO_PANIC : PX_EXPECTED); + PX_NONFATAL : PX_OK); } /* @@ -1898,8 +1931,7 @@ px_err_bit_desc_t *err_bit_descr) { px_t *px_p = DIP_TO_STATE(rpdip); - return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : - PX_NO_PANIC); + return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_OK : PX_NONFATAL); } /* PEC ILU none - see io erpt doc, section 3.1 */ @@ -1922,7 +1954,7 @@ CSR_XR(csr_base, ILU_ERROR_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* PCIEX UE Errors */ @@ -1932,60 +1964,14 @@ ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - px_err_pcie_t regs = {0}; - uint32_t err_bit; - int err; - uint64_t log; - - if (err_bit_descr->bit < 32) { - err_bit = (uint32_t)BITMASK(err_bit_descr->bit); - regs.ue_reg = err_bit; - regs.primary_ue = err_bit; - - /* - * Log the Received Log for PTLP and UR. The PTLP most likely - * is a poisoned completion. The original transaction will be - * logged inthe Transmit Log. - */ - if (err_bit & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR)) { - log = CSR_XR(csr_base, - TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); - regs.rx_hdr1 = (uint32_t)(log >> 32); - regs.rx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); + uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); - log = CSR_XR(csr_base, - TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); - regs.rx_hdr3 = (uint32_t)(log >> 32); - regs.rx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); - } - - if (err_bit & (PCIE_AER_UCE_PTLP)) { - log = CSR_XR(csr_base, - TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG); - regs.tx_hdr1 = (uint32_t)(log >> 32); - regs.tx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); - - log = CSR_XR(csr_base, - TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG); - regs.tx_hdr3 = (uint32_t)(log >> 32); - regs.tx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); - } - } else { - regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); - } - - err = px_err_check_pcie(rpdip, derr, ®s); - - if (err == PX_PANIC) { - return (px_err_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - } else { - return (px_err_no_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - } + return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ue_gos) ? + PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ue, + px_fabric_die_rc_ue_gos)); } -/* PCI-E Uncorrectable Errors */ +/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.2 */ PX_ERPT_SEND_DEC(pciex_rx_ue) { char buf[FM_MAX_CLASS]; @@ -2009,10 +1995,10 @@ CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* PCI-E Uncorrectable Errors */ +/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.3 */ PX_ERPT_SEND_DEC(pciex_tx_ue) { char buf[FM_MAX_CLASS]; @@ -2036,10 +2022,10 @@ CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* PCI-E Uncorrectable Errors */ +/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.4 */ PX_ERPT_SEND_DEC(pciex_rx_tx_ue) { char buf[FM_MAX_CLASS]; @@ -2067,10 +2053,10 @@ CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } -/* PCI-E Uncorrectable Errors */ +/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ PX_ERPT_SEND_DEC(pciex_ue) { char buf[FM_MAX_CLASS]; @@ -2090,7 +2076,7 @@ CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* PCIEX UE Errors */ @@ -2100,23 +2086,11 @@ ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - px_err_pcie_t regs = {0}; - int err; - - if (err_bit_descr->bit < 32) - regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); - else - regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); + uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); - err = px_err_check_pcie(rpdip, derr, ®s); - - if (err == PX_PANIC) { - return (px_err_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - } else { - return (px_err_no_panic_handle(rpdip, csr_base, derr, - err_reg_descr, err_bit_descr)); - } + return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ce_gos) ? + PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ce, + px_fabric_die_rc_ce_gos)); } /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ @@ -2139,7 +2113,7 @@ CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ @@ -2166,7 +2140,7 @@ CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ @@ -2174,51 +2148,6 @@ { char buf[FM_MAX_CLASS]; boolean_t pri = PX_ERR_IS_PRI(bit); - px_t *px_p = DIP_TO_STATE(rpdip); - uint32_t trans_type, fault_addr = 0; - uint64_t rx_h1, rx_h2, tx_h1, tx_h2; - uint16_t s_status; - int sts; - pcie_req_id_t fault_bdf = 0; - pcie_cpl_t *cpl; - pf_data_t pf_data = {0}; - - rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); - rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); - tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); - tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); - - if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || - (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { - pf_data.aer_h0 = (uint32_t)(rx_h1 >> 32); - pf_data.aer_h1 = (uint32_t)rx_h1; - pf_data.aer_h2 = (uint32_t)(rx_h2 >> 32); - pf_data.aer_h3 = (uint32_t)rx_h2; - - /* get completer bdf (fault bdf) from rx logs */ - cpl = (pcie_cpl_t *)&pf_data.aer_h1; - fault_bdf = cpl->cid; - - /* Figure out if UR/CA from rx logs */ - if (cpl->status == PCIE_CPL_STS_UR) - s_status = PCI_STAT_R_MAST_AB; - else if (cpl->status == PCIE_CPL_STS_CA) - s_status = PCI_STAT_R_TARG_AB; - - - pf_data.aer_h0 = (uint32_t)(tx_h1 >> 32); - pf_data.aer_h1 = (uint32_t)tx_h1; - pf_data.aer_h2 = (uint32_t)(tx_h2 >> 32); - pf_data.aer_h3 = (uint32_t)tx_h2; - - /* get fault addr from tx logs */ - sts = pf_tlp_decode(rpdip, &pf_data, 0, &fault_addr, - &trans_type); - - if (sts == DDI_SUCCESS) - (void) px_rp_en_q(px_p, fault_bdf, fault_addr, - s_status); - } (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, @@ -2232,13 +2161,17 @@ ss_reg, FIRE_TLU_OEESS, DATA_TYPE_UINT64, CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), - FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, - FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, - FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, - FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, + FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, + CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), + FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, + CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), + FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, + CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG), + FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, + CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG), NULL); - return (PX_NO_PANIC); + return (PX_OK); } /* TLU Other Event - see io erpt doc, section 3.9 */ @@ -2261,5 +2194,5 @@ CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), NULL); - return (PX_NO_PANIC); + return (PX_OK); }
--- a/usr/src/uts/sun4u/io/px/px_err_impl.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/px/px_err_impl.h Mon Dec 18 11:06:59 2006 -0800 @@ -90,7 +90,7 @@ * Macro to create the error handling forward declaration * * The error handlers examines error, determine the nature of the error - * and return error status in terms of PX_HW_RESET | PX_PANIC | ... + * and return error status in terms of PX_FATAL_HW | PX_FATAL_GOS | ... * terminology. */ #define PX_ERR_BIT_HANDLE_DEC(n) int px_err_ ## n ## _handle\ @@ -114,21 +114,25 @@ /* * Predefined error handling functions. */ -void px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr, char *msg); -int px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); +int px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); +int px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -147,8 +151,8 @@ /* * Fire JBC error Handling Forward Declarations - * the must-panic type errors such as PX_PANIC or - * post-reset-diagnosed type error such as PX_HW_RESET + * the must-panic type errors such as PX_FATAL_GOS or + * post-reset-diagnosed type error such as PX_FATAL_HW * are not furthur diagnosed here because there is no * justification to find out more as immediate error * handling. FMA DE will do the post analysis. @@ -165,6 +169,9 @@ int px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); +int px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); /* Fire JBC error ereport Forward Declarations */ PX_ERPT_SEND_DEC(jbc_fatal); @@ -186,6 +193,12 @@ #define PX_ERR_DMC_CLASS(n) PCIEX_FIRE "." FIRE_DMC_ ## n /* Fire Bit Error Handling Forward Declarations */ +int px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); +int px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); int px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -195,7 +208,10 @@ int px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_mmu_tte_cae_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); +int px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -213,9 +229,6 @@ #define PX_ERR_PEC_CLASS(n) PCIEX_FIRE "." FIRE_PEC_ ## n #define PX_ERR_PEC_OB_CLASS(n) PCIEX_OBERON "." FIRE_PEC_ ## n -int px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); int px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr);
--- a/usr/src/uts/sun4u/io/px/px_lib4u.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/px/px_lib4u.c Mon Dec 18 11:06:59 2006 -0800 @@ -1260,7 +1260,6 @@ if (ret != H_EOK) cb_p->attachcnt++; } - pxu_p->cpr_flag = PX_ENTERED_CPR; fail: return ((ret != H_EOK) ? DDI_FAILURE: DDI_SUCCESS); @@ -1425,19 +1424,14 @@ hp->ah_acc.devacc_attr_dataorder = DDI_STRICTORDER_ACC; } -/* This function is called only by poke, caut put and pxtool poke. */ void -px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr) +px_lib_clr_errs(px_t *px_p) { px_pec_t *pec_p = px_p->px_pec_p; dev_info_t *rpdip = px_p->px_dip; - int rc_err, fab_err, i; + int err = PX_OK, ret; int acctype = pec_p->pec_safeacc_type; ddi_fm_error_t derr; - px_ranges_t *ranges_p; - int range_len; - uint32_t addr_high, addr_low; - pcie_req_id_t bdf = 0; /* Create the derr */ bzero(&derr, sizeof (ddi_fm_error_t)); @@ -1453,44 +1447,19 @@ mutex_enter(&px_p->px_fm_mutex); /* send ereport/handle/clear fire registers */ - rc_err = px_err_cmn_intr(px_p, &derr, PX_LIB_CALL, PX_FM_BLOCK_ALL); - - /* Figure out if this is a cfg or mem32 access */ - addr_high = (uint32_t)(addr >> 32); - addr_low = (uint32_t)addr; - range_len = px_p->px_ranges_length / sizeof (px_ranges_t); - i = 0; - for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { - if (ranges_p->parent_high == addr_high) { - switch (ranges_p->child_high & PCI_ADDR_MASK) { - case PCI_ADDR_CONFIG: - bdf = (pcie_req_id_t)(addr_low >> 12); - addr_low = 0; - break; - case PCI_ADDR_MEM32: - if (rdip) - (void) pcie_get_bdf_from_dip(rdip, - &bdf); - else - bdf = NULL; - break; - } - break; - } - } - - px_rp_en_q(px_p, bdf, addr_low, NULL); + err = px_err_handle(px_p, &derr, PX_LIB_CALL, B_TRUE); + + /* Check all child devices for errors */ + ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + + mutex_exit(&px_p->px_fm_mutex); /* - * XXX - Current code scans the fabric for all px_tool accesses. - * In future, do not scan fabric for px_tool access to IO Root Nexus + * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, + * therefore it does not cause panic. */ - fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, - &px_p->px_dq_tail); - - mutex_exit(&px_p->px_fm_mutex); - - px_err_panic(rc_err, PX_RC, fab_err); + if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) + PX_FM_PANIC("Fatal System Port Error has occurred\n"); } #ifdef DEBUG @@ -1523,7 +1492,7 @@ } else err = DDI_FAILURE; - px_lib_clr_errs(px_p, rdip, in_args->dev_addr); + px_lib_clr_errs(px_p); if (otd.ot_trap & OT_DATA_ACCESS) err = DDI_FAILURE; @@ -1598,7 +1567,7 @@ if (flags == DDI_DEV_AUTOINCR) dev_addr += size; - px_lib_clr_errs(px_p, rdip, dev_addr); + px_lib_clr_errs(px_p); if (pec_p->pec_ontrap_data->ot_trap & OT_DATA_ACCESS) { err = DDI_FAILURE; @@ -2336,7 +2305,6 @@ break; case CB_CODE_CPR_RESUME: - pxu_p->cpr_flag = PX_NOT_CPR; mutex_enter(&ib_p->ib_ino_lst_mutex); ce_ino_p = px_ib_locate_ino(ib_p, ce_ino);
--- a/usr/src/uts/sun4u/io/px/px_lib4u.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/px/px_lib4u.h Mon Dec 18 11:06:59 2006 -0800 @@ -107,7 +107,6 @@ uint64_t *ib_config_state; uint64_t *xcb_config_state; uint64_t *msiq_config_state; - uint_t cpr_flag; /* sun4u specific vars */ caddr_t px_address[4]; @@ -119,10 +118,6 @@ #define PX2CB(px_p) (((pxu_t *)px_p->px_plat_p)->px_cb_p) -/* cpr_flag */ -#define PX_NOT_CPR 0 -#define PX_ENTERED_CPR 1 - /* * Event Queue data structure. */ @@ -388,7 +383,7 @@ extern int px_link_retrain(caddr_t csr_base); extern void px_enable_detect_quiet(caddr_t csr_base); -extern void px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr); +extern void px_lib_clr_errs(px_t *px_p); /* * Hotplug functions:
--- a/usr/src/uts/sun4u/io/px/px_tools_4u.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4u/io/px/px_tools_4u.c Mon Dec 18 11:06:59 2006 -0800 @@ -207,7 +207,7 @@ } else err = DDI_FAILURE; - px_lib_clr_errs(px_p, 0, paddr); + px_lib_clr_errs(px_p); if (otd.ot_trap & OT_DATA_ACCESS) err = DDI_FAILURE;
--- a/usr/src/uts/sun4v/io/px/px_err.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4v/io/px/px_err.c Mon Dec 18 11:06:59 2006 -0800 @@ -38,37 +38,16 @@ #include "px_obj.h" #include "px_err.h" -static void px_err_fill_pf_data(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt); -static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); -static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, +static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt); +static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, int caller); -static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, - boolean_t is_block_pci, char *msg); -static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); -static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt); - -/* Include the code generated sun4v epkt checking code */ -#include "px_err_gen.c" - -/* - * This variable indicates if we have a hypervisor that could potentially send - * incorrect epkts. We always set this to TRUE for now until we find a way to - * tell if this HV bug has been fixed. - */ -boolean_t px_legacy_epkt = B_TRUE; +static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller); +static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller); +static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller); /* * px_err_cb_intr: @@ -81,7 +60,7 @@ px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; if (epkt != NULL) { - return (px_err_intr(fault_p, epkt)); + return (px_err_common_intr(fault_p, epkt)); } return (DDI_INTR_UNCLAIMED); @@ -98,108 +77,55 @@ px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; if (epkt != NULL) { - return (px_err_intr(fault_p, epkt)); + return (px_err_common_intr(fault_p, epkt)); } return (DDI_INTR_UNCLAIMED); } /* - * px_err_cmn_intr: + * px_err_handle: * Common function called by trap, mondo and fabric intr. * This function is more meaningful in sun4u implementation. Kept * to mirror sun4u call stack. * o check for safe access - * o create and queue RC info for later use in fabric scan. - * o RUC/WUC, PTLP, MMU Errors(CA), UR * * @param px_p leaf in which to check access * @param derr fm err data structure to be updated * @param caller PX_TRAP_CALL | PX_INTR_CALL * @param chkjbc whether to handle hostbus registers (ignored) - * @return err PX_NO_PANIC | PX_PROTECTED | - * PX_PANIC | PX_HW_RESET | PX_EXPECTED + * @return err PX_OK | PX_NONFATAL | + * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL */ /* ARGSUSED */ int -px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) +px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, + boolean_t chkxbc) { + /* check for safe access */ px_err_safeacc_check(px_p, derr); + return (DDI_FM_OK); } /* - * fills RC specific fault data - */ -static void -px_err_fill_pfd(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt) { - pf_data_t pf_data = {0}; - int sts = DDI_SUCCESS; - pcie_req_id_t fault_bdf = 0; - uint32_t fault_addr = 0; - uint16_t s_status = 0; - - /* Add an PCIE PF_DATA Entry */ - if (epkt->rc_descr.block == BLOCK_MMU) { - /* Only PIO Fault Addresses are valid, this is DMA */ - s_status = PCI_STAT_S_TARG_AB; - fault_addr = NULL; - - if (epkt->rc_descr.H) - fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); - else - sts = DDI_FAILURE; - } else { - px_pec_err_t *pec_p = (px_pec_err_t *)epkt; - uint32_t trans_type; - uint32_t dir = pec_p->pec_descr.dir; - - pf_data.aer_h0 = (uint32_t)(pec_p->hdr[0]); - pf_data.aer_h1 = (uint32_t)(pec_p->hdr[0] >> 32); - pf_data.aer_h2 = (uint32_t)(pec_p->hdr[1]); - pf_data.aer_h3 = (uint32_t)(pec_p->hdr[1] >> 32); - - /* translate RC UR/CA to legacy secondary errors */ - if ((dir == DIR_READ || dir == DIR_WRITE) && - pec_p->pec_descr.U) { - if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) - s_status |= PCI_STAT_R_MAST_AB; - if (pec_p->ue_reg_status | PCIE_AER_UCE_CA) - s_status |= PCI_STAT_R_TARG_AB; - } - - if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) - s_status |= PCI_STAT_PERROR; - - if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) - s_status |= PCI_STAT_S_TARG_AB; - - sts = pf_tlp_decode(dip, &pf_data, &fault_bdf, &fault_addr, - &trans_type); - } - - if (sts == DDI_SUCCESS) - px_rp_en_q(px_p, fault_bdf, fault_addr, s_status); -} - -/* - * px_err_intr: + * px_err_common_intr: * Interrupt handler for the JBC/DMC/PEC block. * o lock * o create derr * o check safe access - * o px_err_check_severity(epkt) - * o pcie_scan_fabric + * o px_err_check_severiy(epkt) + * o dispatch * o Idle intr state * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ static uint_t -px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) +px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) { px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); dev_info_t *rpdip = px_p->px_dip; - int rc_err, fab_err = PF_NO_PANIC, msg; + int err, ret; ddi_fm_error_t derr; mutex_enter(&px_p->px_fm_mutex); @@ -211,15 +137,13 @@ derr.fme_flag = DDI_FM_ERR_UNEXPECTED; /* Basically check for safe access */ - (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); + (void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE); /* Check the severity of this error */ - rc_err = px_err_epkt_severity(px_p, &derr, epkt, PX_INTR_CALL); + err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL); - /* Scan the fabric if the root port is not in drain state. */ - if (!px_lib_is_in_drain_state(px_p)) - fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, - &px_p->px_dq_tail); + /* check for error severity */ + ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); /* Set the intr state to idle for the leaf that received the mondo */ if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, @@ -230,27 +154,14 @@ mutex_exit(&px_p->px_fm_mutex); - switch (epkt->rc_descr.block) { - case BLOCK_MMU: /* FALLTHROUGH */ - case BLOCK_INTR: - msg = PX_RC; - break; - case BLOCK_PCIE: - msg = PX_RP; - break; - case BLOCK_HOSTBUS: /* FALLTHROUGH */ - default: - msg = PX_HB; - break; - } - - px_err_panic(rc_err, msg, fab_err); + if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) + PX_FM_PANIC("Fatal System Bus Error has occurred\n"); return (DDI_INTR_CLAIMED); } /* - * px_err_epkt_severity: + * px_err_check_severity: * Check the severity of the fire error based the epkt received * * @param px_p leaf in which to take the snap shot. @@ -258,19 +169,15 @@ * @param epkt epkt recevied from HV */ static int -px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, +px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, int caller) { px_pec_t *pec_p = px_p->px_pec_p; dev_info_t *dip = px_p->px_dip; - boolean_t is_safeacc = B_FALSE; - boolean_t is_block_pci = B_FALSE; - char buf[FM_MAX_CLASS], descr_buf[1024]; int err = 0; /* Cautious access error handling */ - switch (derr->fme_flag) { - case DDI_FM_ERR_EXPECTED: + if (derr->fme_flag == DDI_FM_ERR_EXPECTED) { if (caller == PX_TRAP_CALL) { /* * for ddi_caut_get treat all events as nonfatal @@ -278,7 +185,6 @@ * err_status = NONFATAL. */ derr->fme_status = DDI_FM_NONFATAL; - is_safeacc = B_TRUE; } else { /* * For ddi_caut_put treat all events as nonfatal. Here @@ -286,358 +192,175 @@ */ derr->fme_status = DDI_FM_NONFATAL; ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); - is_safeacc = B_TRUE; } - break; - case DDI_FM_ERR_PEEK: - case DDI_FM_ERR_POKE: - /* - * For ddi_peek/poke treat all events as nonfatal. - */ - is_safeacc = B_TRUE; - break; - default: - is_safeacc = B_FALSE; } - /* - * Older hypervisors in some cases send epkts with incorrect fields. - * We have to handle these "special" epkts correctly. - */ - if (px_legacy_epkt) - px_fix_legacy_epkt(dip, derr, epkt); - switch (epkt->rc_descr.block) { case BLOCK_HOSTBUS: - err = px_cb_epkt_severity(dip, derr, epkt); + err = px_cb_check_errors(dip, derr, epkt, caller); break; case BLOCK_MMU: - err = px_mmu_epkt_severity(dip, derr, epkt); - px_err_fill_pfd(dip, px_p, epkt); + err = px_mmu_check_errors(dip, derr, epkt, caller); break; case BLOCK_INTR: - err = px_intr_epkt_severity(dip, derr, epkt); + err = PX_NONFATAL; break; case BLOCK_PCIE: - is_block_pci = B_TRUE; - err = px_pcie_epkt_severity(dip, derr, epkt); - px_err_fill_pfd(dip, px_p, epkt); + err = px_pcie_check_errors(dip, derr, epkt, caller); break; default: - err = 0; + err = PX_ERR_UNKNOWN; } - if ((err & PX_HW_RESET) || (err & PX_PANIC)) { - if (px_log & PX_PANIC) - px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); - } else if (err & PX_PROTECTED) { - if (px_log & PX_PROTECTED) - px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); - } else if (err & PX_NO_PANIC) { - if (px_log & PX_NO_PANIC) - px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); - } else if (err & PX_NO_ERROR) { - if (px_log & PX_NO_ERROR) - px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); - } else if (err == 0) { - px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); - - /* Unrecognized epkt. send ereport */ - (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); - - if (is_block_pci) { - px_pec_err_t *pec = (px_pec_err_t *)epkt; - - (void) snprintf(descr_buf, sizeof (descr_buf), - "Epkt contents:\n" - "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" - "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" - "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" - "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" - "HDR1: 0x%lx, HDR2: 0x%lx\n" - "Err Src Reg: 0x%x, Root Err Status: 0x%x\n", - pec->pec_descr.block, pec->pec_descr.dir, - pec->pec_descr.Z, pec->pec_descr.S, - pec->pec_descr.R, pec->pec_descr.I, - pec->pec_descr.H, pec->pec_descr.C, - pec->pec_descr.U, pec->pec_descr.E, - pec->pec_descr.P, pec->pci_err_status, - pec->pcie_err_status, pec->ce_reg_status, - pec->ue_reg_status, pec->hdr[0], - pec->hdr[1], pec->err_src_reg, - pec->root_err_status); - - ddi_fm_ereport_post(dip, buf, derr->fme_ena, - DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, - EPKT_SYSINO, DATA_TYPE_UINT64, pec->sysino, - EPKT_EHDL, DATA_TYPE_UINT64, pec->ehdl, - EPKT_STICK, DATA_TYPE_UINT64, pec->stick, - EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); - } else { - (void) snprintf(descr_buf, sizeof (descr_buf), - "Epkt contents:\n" - "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" - "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" - "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" - "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n", - epkt->rc_descr.block, epkt->rc_descr.op, - epkt->rc_descr.phase, epkt->rc_descr.cond, - epkt->rc_descr.dir, epkt->rc_descr.STOP, - epkt->rc_descr.H, epkt->rc_descr.R, - epkt->rc_descr.D, epkt->rc_descr.M, - epkt->rc_descr.S, epkt->size, epkt->addr, - epkt->hdr[0], epkt->hdr[1], epkt->reserved); - - ddi_fm_ereport_post(dip, buf, derr->fme_ena, - DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, - EPKT_SYSINO, DATA_TYPE_UINT64, epkt->sysino, - EPKT_EHDL, DATA_TYPE_UINT64, epkt->ehdl, - EPKT_STICK, DATA_TYPE_UINT64, epkt->stick, - EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); - } - - err = PX_PANIC; - } - - /* Readjust the severity as a result of safe access */ - if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) - err = PX_NO_PANIC; - return (err); } -static void -px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, - char *msg) +/* ARGSUSED */ +static int +px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller) { - if (is_block_pci) { - px_pec_err_t *pec = (px_pec_err_t *)epkt; - DBG(DBG_ERR_INTR, dip, - "A PCIe root port error has occured with a severity" - " \"%s\"\n" - "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" - "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" - "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" - "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" - "\terr_src: 0x%x, root_err: 0x%x\n", - msg, pec->pec_descr.block, pec->pec_descr.dir, - pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, - pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, - pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, - pec->pci_err_status, pec->pcie_err_status, - pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], - pec->hdr[1], pec->err_src_reg, pec->root_err_status); - } else { - DBG(DBG_ERR_INTR, dip, - "A PCIe root complex error has occured with a severity" - " \"%s\"\n" - "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" - "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" - "\tS=%d, Size: 0x%x, Addr: 0x%p\n" - "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", - msg, epkt->rc_descr.block, epkt->rc_descr.op, - epkt->rc_descr.phase, epkt->rc_descr.cond, - epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, - epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, - epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], - epkt->hdr[1], epkt->reserved); - } -} + int fme_flag = derr->fme_flag; + boolean_t is_safeacc; + int ret, err = 0; + + is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) || + (fme_flag == DDI_FM_ERR_PEEK) || + (fme_flag == DDI_FM_ERR_POKE); -/* ARGSUSED */ -static void -px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) -{ - /* - * We don't have a default case for any of the below switch statements - * since we are ok with the code falling through. - */ - switch (epkt->rc_descr.block) { - case BLOCK_HOSTBUS: - switch (epkt->rc_descr.op) { - case OP_DMA: - switch (epkt->rc_descr.phase) { - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_UNKNOWN: - switch (epkt->rc_descr.dir) { - case DIR_RESERVED: - epkt->rc_descr.dir = DIR_READ; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ + /* block/op/phase/cond/dir/flag... */ + switch (epkt->rc_descr.op) { + case OP_PIO: + err = PX_NONFATAL; + /* check handle if affected memory address is captured */ + if (epkt->rc_descr.M != 0) { + ret = px_handle_lookup(dip, ACC_HANDLE, + derr->fme_ena, (void *)epkt->addr); + } + if (ret == DDI_FM_FATAL) + err |= PX_FATAL_GOS; break; - case BLOCK_MMU: - switch (epkt->rc_descr.op) { - case OP_XLAT: - switch (epkt->rc_descr.phase) { - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_PROT: - switch (epkt->rc_descr.dir) { - case DIR_UNKNOWN: - epkt->rc_descr.dir = DIR_WRITE; - break; - } /* DIR */ - } /* CND */ + + case OP_DMA: + switch (epkt->rc_descr.phase) { + case PH_ADDR: + err = PX_FATAL_GOS; + break; + case PH_DATA: + if (epkt->rc_descr.cond == CND_UE) { + err = PX_FATAL_GOS; break; - case PH_IRR: - switch (epkt->rc_descr.cond) { - case CND_RESERVED: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - epkt->rc_descr.phase = PH_ADDR; - epkt->rc_descr.cond = CND_IRR; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ + } + + err = PX_NONFATAL; + if (epkt->rc_descr.M == 1) { + ret = px_handle_lookup(dip, DMA_HANDLE, + derr->fme_ena, (void *)epkt->addr); + if (ret == DDI_FM_FATAL) + err |= PX_FATAL_GOS; + } + break; + default: + DBG(DBG_ERR_INTR, dip, "Unexpected epkt"); + err = PX_FATAL_GOS; + break; + } break; - case BLOCK_INTR: - switch (epkt->rc_descr.op) { - case OP_MSIQ: - switch (epkt->rc_descr.phase) { - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_RESERVED: - epkt->rc_descr.dir = DIR_IRR; - break; - } /* DIR */ - break; - case CND_IRR: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - epkt->rc_descr.cond = CND_OV; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - break; - case OP_RESERVED: - switch (epkt->rc_descr.phase) { - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - epkt->rc_descr.op = OP_MSI32; - epkt->rc_descr.phase = PH_DATA; - break; - } /* DIR */ - } /* CND */ - break; - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_UNKNOWN: - epkt->rc_descr.op = OP_MSI32; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ - } /* BLOCK */ + case OP_UNKNOWN: + err = PX_NONFATAL; + if ((epkt->rc_descr.cond == CND_UNMAP) || + (epkt->rc_descr.cond == CND_UE) || + (epkt->rc_descr.cond == CND_INT) || + (epkt->rc_descr.cond == CND_ILL)) + err |= PX_FATAL_GOS; + + if (epkt->rc_descr.M == 1) { + int ret1, ret2; + + ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, + (void *)epkt->addr); + ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena, + (void *)epkt->addr); + + if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL) + err |= PX_FATAL_GOS; + } + break; + + case OP_RESERVED: + default: + DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error."); + err = PX_FATAL_GOS; + break; + } + + /* + * For protected safe access, consider PX_FATAL_GOS as the only + * exception for px to take immediate panic, else, treat errors + * as nonfatal. + */ + if (is_safeacc) { + if (err & PX_FATAL_GOS) + err = PX_FATAL_GOS; + else + err = PX_NONFATAL; + } + + return (err); } /* ARGSUSED */ static int -px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller) { - return (px_err_check_eq(dip)); + int ret, err = 0; + + switch (epkt->rc_descr.op) { + case OP_BYPASS: /* nonfatal */ + case OP_XLAT: /* nonfatal, stuck-fatal, fatal-reset */ + case OP_TBW: /* nonfatal, stuck-fatal */ + err = PX_NONFATAL; + break; + default: + err = PX_ERR_UNKNOWN; + break; + } + + if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) { + ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, + (void *)epkt->addr); + if (ret == DDI_FM_FATAL) + err |= PX_FATAL_GOS; + else + err |= PX_NONFATAL; + } else + err |= PX_NONFATAL; + + return (err); } /* ARGSUSED */ static int -px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt, int caller) { + int ret = PX_NONFATAL; px_pec_err_t *pec = (px_pec_err_t *)epkt; - px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; - pf_data_t pf_data; - int x; - uint32_t temp; - /* - * Check for failed PIO Read/Writes, which are errors that are not - * defined in the PCIe spec. - */ - temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; - if (((pec->pec_descr.dir == DIR_READ) || (pec->pec_descr.dir == - DIR_WRITE)) && pec->pec_descr.U && (pec->ue_reg_status == temp)) { - pf_data.aer_h0 = (uint32_t)(pec->hdr[0]); - pf_data.aer_h1 = (uint32_t)(pec->hdr[0] >> 32); - pf_data.aer_h2 = (uint32_t)(pec->hdr[1]); - pf_data.aer_h3 = (uint32_t)(pec->hdr[1] >> 32); - - if (pf_tlp_hdl_lookup(dip, derr, &pf_data) != DDI_FM_UNKNOWN) - return (PX_NO_PANIC); - else - return (PX_PANIC); + switch (pec->pec_descr.dir) { + case DIR_INGRESS: + case DIR_EGRESS: + case DIR_LINK: + ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, + px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos); + ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, + px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos); + break; + default: + ret = PX_ERR_UNKNOWN; + break; } - if (!pec->pec_descr.C) - pec->ce_reg_status = 0; - if (!pec->pec_descr.U) - pec->ue_reg_status = 0; - if (!pec->pec_descr.H) - pec->hdr[0] = 0; - if (!pec->pec_descr.I) - pec->hdr[1] = 0; - - /* - * According to the PCIe spec, there is a first error pointer. If there - * are header logs recorded and there are more than one error, the log - * will belong to the error that the first error pointer points to. - * - * The regs.primary_ue expects a bit number, go through the ue register - * and find the first error that occured. Because the sun4v epkt spec - * does not define this value, the algorithm below gives the lower bit - * priority. - */ - temp = pcie->ue_reg; - if (temp) { - for (x = 0; !(temp & 0x1); x++) { - temp = temp >> 1; - } - pcie->primary_ue = 1 << x; - } else { - pcie->primary_ue = 0; - } - - /* Sun4v doesn't log the TX hdr except for CTOs */ - if (pcie->primary_ue == PCIE_AER_UCE_TO) { - pcie->tx_hdr1 = pcie->rx_hdr1; - pcie->tx_hdr2 = pcie->rx_hdr2; - pcie->tx_hdr3 = pcie->rx_hdr3; - pcie->tx_hdr4 = pcie->rx_hdr4; - pcie->rx_hdr1 = 0; - pcie->rx_hdr2 = 0; - pcie->rx_hdr3 = 0; - pcie->rx_hdr4 = 0; - } else { - pcie->tx_hdr1 = 0; - pcie->tx_hdr2 = 0; - pcie->tx_hdr3 = 0; - pcie->tx_hdr4 = 0; - } - - return (px_err_check_pcie(dip, derr, pcie)); + return (ret); } - -static int -px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) -{ - uint32_t addr = (uint32_t)epkt->addr; - pcie_req_id_t bdf = NULL; - - if (epkt->rc_descr.H) { - bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); - } - - return (pf_hdl_lookup(dip, derr->fme_ena, PF_DMA_ADDR, addr, - bdf)); -}
--- a/usr/src/uts/sun4v/io/px/px_err.h Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4v/io/px/px_err.h Mon Dec 18 11:06:59 2006 -0800 @@ -2,8 +2,9 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -111,13 +112,6 @@ #define DIR_UNKNOWN 0xe #define DIR_IRR 0xf -#define PX_FM_RC_UNRECOG "fire.epkt" -#define EPKT_SYSINO "sysino" -#define EPKT_EHDL "ehdl" -#define EPKT_STICK "stick" -#define EPKT_RC_DESCR "rc_descr" -#define EPKT_PEC_DESCR "pec_descr" - typedef struct root_complex { uint64_t sysino; uint64_t ehdl; @@ -128,8 +122,7 @@ phase : 4, cond : 4, dir : 4, - STOP : 1, - : 6, + : 7, H : 1, R : 1, D : 1,
--- a/usr/src/uts/sun4v/io/px/px_err_gen.c Mon Dec 18 10:59:02 2006 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,475 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * The file has been code generated. Do NOT modify this file directly. Please - * use the sun4v PCIe FMA code generation tool. - * - * This file was generated for the following platforms: - * - Fire - * - N2PIU - */ - -/* ARGSUSED */ -static int -px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) -{ - int err = 0; - - /* STOP bit indicates a secondary error. Panic if it is set */ - if (epkt->rc_descr.STOP == 1) - return (PX_PANIC); - - switch (epkt->rc_descr.op) { - case OP_DMA: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - case DIR_RDWR: - err = PX_PANIC; - break; - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_UE: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_UNKNOWN: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_PIO: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - switch (epkt->rc_descr.cond) { - case CND_UNMAP: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - case DIR_RDWR: - err = PX_PANIC; - break; - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_READ: - err = PX_PANIC; - break; - case DIR_WRITE: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_TO: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_UNKNOWN: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - switch (epkt->rc_descr.cond) { - case CND_UNMAP: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_UE: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ - - return (err); -} - - -/* ARGSUSED */ -static int -px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) -{ - int err = 0; - - /* STOP bit indicates a secondary error. Panic if it is set */ - if (epkt->rc_descr.STOP == 1) - return (PX_PANIC); - - switch (epkt->rc_descr.op) { - case OP_BYPASS: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_UNKNOWN: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_TBW: - switch (epkt->rc_descr.phase) { - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_UNKNOWN: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_XLAT: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - case CND_IRR: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_PROT: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - case CND_UNMAP: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INV: - switch (epkt->rc_descr.dir) { - case DIR_RDWR: - err = PX_NO_PANIC; - break; - case DIR_UNKNOWN: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - case CND_IRR: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_PROT: - switch (epkt->rc_descr.dir) { - case DIR_WRITE: - err = PX_NO_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ - - if (epkt->rc_descr.D && (err & (PX_PANIC | PX_PROTECTED)) && - px_mmu_handle_lookup(dip, derr, epkt) == PF_HDL_FOUND) - err = PX_NO_PANIC; - - return (err); -} - - -/* ARGSUSED */ -static int -px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) -{ - int err = 0; - - /* STOP bit indicates a secondary error. Panic if it is set */ - if (epkt->rc_descr.STOP == 1) - return (PX_PANIC); - - switch (epkt->rc_descr.op) { - case OP_MSI32: - switch (epkt->rc_descr.phase) { - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_UNKNOWN: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_MSI64: - switch (epkt->rc_descr.phase) { - case PH_DATA: - switch (epkt->rc_descr.cond) { - case CND_INT: - switch (epkt->rc_descr.dir) { - case DIR_UNKNOWN: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_MSIQ: - switch (epkt->rc_descr.phase) { - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = PX_PANIC; - break; - } /* DIR */ - break; - case CND_OV: - switch (epkt->rc_descr.dir) { - case DIR_IRR: - err = px_intr_handle_errors(dip, derr, - epkt); - break; - } /* DIR */ - break; - } /* CND */ - break; - } /* PH */ - break; - case OP_PCIEMSG: - switch (epkt->rc_descr.phase) { - case PH_UNKNOWN: - switch (epkt->rc_descr.cond) { - case CND_ILL: - switch (epkt->rc_descr.dir) { - case DIR_INGRESS: - err = PX_PANIC; - break; - } /* DIR */ - } /* CND */ - } /* PH */ - } /* OP */ - - return (err); -}
--- a/usr/src/uts/sun4v/io/px/px_lib4v.c Mon Dec 18 10:59:02 2006 -0800 +++ b/usr/src/uts/sun4v/io/px/px_lib4v.c Mon Dec 18 11:06:59 2006 -0800 @@ -1436,13 +1436,8 @@ * This will initiate something similar to px_fm_callback. */ static void -px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag, - r_addr_t addr) +px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag) { - uint32_t addr_high, addr_low; - pcie_req_id_t bdf; - px_ranges_t *ranges_p; - int range_len, i; ddi_acc_impl_t *hp = (ddi_acc_impl_t *)handle; ddi_fm_error_t derr; @@ -1454,41 +1449,9 @@ if (hp) hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED; - addr_high = (uint32_t)(addr >> 32); - addr_low = (uint32_t)addr; - - /* - * Make sure this failed load came from this PCIe port. Check by - * matching the upper 32 bits of the address with the ranges property. - */ - range_len = px_p->px_ranges_length / sizeof (px_ranges_t); - i = 0; - for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { - if (ranges_p->parent_high == addr_high) { - switch (ranges_p->child_high & PCI_ADDR_MASK) { - case PCI_ADDR_CONFIG: - bdf = (pcie_req_id_t)(addr_low >> 12); - break; - default: - bdf = NULL; - break; - } - break; - } - } - mutex_enter(&px_p->px_fm_mutex); - if (!px_lib_is_in_drain_state(px_p)) { - /* - * This is to ensure that device corresponding to the addr of - * the failed PIO/CFG load gets scanned. - */ - px_rp_en_q(px_p, bdf, addr, - (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB)); - (void) pf_scan_fabric(px_p->px_dip, &derr, - px_p->px_dq_p, &px_p->px_dq_tail); - } + (void) ndi_fm_handler_dispatch(px_p->px_dip, NULL, &derr); mutex_exit(&px_p->px_fm_mutex); } @@ -1593,7 +1556,7 @@ */ px_lib_log_safeacc_err(px_p, (ddi_acc_handle_t)hp, (hp ? DDI_FM_ERR_EXPECTED : - DDI_FM_ERR_POKE), ra); + DDI_FM_ERR_POKE)); pec_p->pec_ontrap_data = NULL; pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; @@ -1683,7 +1646,7 @@ */ px_lib_log_safeacc_err(px_p, (ddi_acc_handle_t)hp, (hp ? DDI_FM_ERR_EXPECTED : - DDI_FM_ERR_PEEK), ra); + DDI_FM_ERR_PEEK)); /* Stuff FFs in host addr if peek. */ if (hp == NULL) {