Mercurial > illumos > illumos-gate
view usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c @ 3730:caeeab9fbf5a
6521864 Niagara-2 page retirement leaks memory
author | td122701 |
---|---|
date | Wed, 28 Feb 2007 16:03:37 -0800 |
parents | 52fa1667b7c1 |
children | f3c37b601acc |
line wrap: on
line source
/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Ereport-handling routines for memory errors */ #include <cmd_mem.h> #include <cmd_dimm.h> #include <cmd_bank.h> #include <cmd_page.h> #include <cmd_cpu.h> #include <cmd.h> #include <assert.h> #include <strings.h> #include <string.h> #include <errno.h> #include <unistd.h> #include <fm/fmd_api.h> #include <sys/fm/ldom.h> #include <sys/fm/protocol.h> #include <sys/fm/cpu/UltraSPARC-T1.h> #include <sys/mdesc.h> #include <sys/async.h> #include <sys/errclassify.h> #include <sys/niagararegs.h> #include <sys/fm/ldom.h> #include <ctype.h> extern ldom_hdl_t *cpumem_diagnosis_lhp; static fmd_hdl_t *cpumem_hdl = NULL; static void * cpumem_alloc(size_t size) { assert(cpumem_hdl != NULL); return (fmd_hdl_alloc(cpumem_hdl, size, FMD_SLEEP)); } static void cpumem_free(void *addr, size_t size) { assert(cpumem_hdl != NULL); fmd_hdl_free(cpumem_hdl, addr, size); } /*ARGSUSED*/ cmd_evdisp_t cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status, uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu) { /* * Niagara writebacks from L2 containing UEs are placed in memory * with the poison syndrome NI_DRAM_POISON_SYND_FROM_LDWU. * Memory UE ereports showing this syndrome are dropped because they * indicate an L2 problem, which should be diagnosed from the * corresponding L2 cache ereport. */ if (cpu->cpu_type == CPU_ULTRASPARC_T1) { if (synd == NI_DRAM_POISON_SYND_FROM_LDWU) { fmd_hdl_debug(hdl, "discarding UE due to magic syndrome %x\n", synd); return (CMD_EVD_UNUSED); } } return (CMD_EVD_OK); } /* * sun4v's xe_common routine has an extra argument, clcode, compared * to routine of same name in sun4u. */ static cmd_evdisp_t xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr) { uint64_t afar, l2_afar, dram_afar; uint64_t l2_afsr, dram_afsr; uint16_t synd; uint8_t afar_status, synd_status; nvlist_t *rsrc; char *typenm; uint64_t disp = 0; int minorvers = 1; if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &l2_afsr) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESR, &l2_afsr) != 0) return (CMD_EVD_BAD); if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR, &dram_afsr) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_DRAM_ESR, &dram_afsr) != 0) return (CMD_EVD_BAD); if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &l2_afar) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_EAR, &l2_afar) != 0) return (CMD_EVD_BAD); if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR, &dram_afar) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_DRAM_EAR, &dram_afar) != 0) return (CMD_EVD_BAD); if (nvlist_lookup_pairs(nvl, 0, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm, FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc, NULL) != 0) return (CMD_EVD_BAD); synd = dram_afsr; /* * Niagara afar and synd validity. * For a given set of error registers, the payload value is valid if * no higher priority error status bit is set. See UltraSPARC-T1.h for * error status bit values and priority settings. Note that for DAC * and DAU, afar value is taken from l2 error registers, syndrome * from dram error * registers; for DSC and DSU, both afar and * syndrome are taken from dram * error registers. DSU afar and * syndrome are always valid because no * higher priority error will override. */ switch (clcode) { case CMD_ERRCL_DAC: afar = l2_afar; afar_status = ((l2_afsr & NI_L2AFSR_P10) == 0) ? AFLT_STAT_VALID : AFLT_STAT_INVALID; synd_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ? AFLT_STAT_VALID : AFLT_STAT_INVALID; break; case CMD_ERRCL_DSC: afar = dram_afar; afar_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ? AFLT_STAT_VALID : AFLT_STAT_INVALID; synd_status = afar_status; break; case CMD_ERRCL_DAU: afar = l2_afar; afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ? AFLT_STAT_VALID : AFLT_STAT_INVALID; synd_status = AFLT_STAT_VALID; break; case CMD_ERRCL_DSU: afar = dram_afar; afar_status = synd_status = AFLT_STAT_VALID; break; default: fmd_hdl_debug(hdl, "Niagara unrecognized mem error %llx\n", clcode); return (CMD_EVD_UNUSED); } return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd, synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc)); } /*ARGSUSED*/ cmd_evdisp_t cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common)); } /*ARGSUSED*/ cmd_evdisp_t cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common)); } /*ARGSUSED*/ cmd_evdisp_t cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { return (CMD_EVD_UNUSED); } /*ARGSUSED*/ ulong_t cmd_mem_get_phys_pages(fmd_hdl_t *hdl) { /* * Compute and return the total physical memory in pages from the * MD/PRI. * Cache its value. */ static ulong_t npage = 0; md_t *mdp; mde_cookie_t *listp; uint64_t bmem, physmem = 0; ssize_t bufsiz = 0; uint64_t *bufp; int num_nodes, nmblocks, i; if (npage > 0) { return (npage); } if (cpumem_hdl == NULL) { cpumem_hdl = hdl; } if ((bufsiz = ldom_get_core_md(cpumem_diagnosis_lhp, &bufp)) <= 0) { return (0); } if ((mdp = md_init_intern(bufp, cpumem_alloc, cpumem_free)) == NULL || (num_nodes = md_node_count(mdp)) <= 0) { cpumem_free(bufp, (size_t)bufsiz); return (0); } listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) * num_nodes); nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "mblock"), md_find_name(mdp, "fwd"), listp); for (i = 0; i < nmblocks; i++) { if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) { physmem = 0; break; } physmem += bmem; } npage = (ulong_t)(physmem / cmd.cmd_pagesize); cpumem_free(listp, sizeof (mde_cookie_t) * num_nodes); cpumem_free(bufp, (size_t)bufsiz); (void) md_fini(mdp); return (npage); } static int galois_mul[16][16] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* 0 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, /* 1 */ { 0, 2, 4, 6, 8, 10, 12, 14, 3, 1, 7, 5, 11, 9, 15, 13}, /* 2 */ { 0, 3, 6, 5, 12, 15, 10, 9, 11, 8, 13, 14, 7, 4, 1, 2}, /* 3 */ { 0, 4, 8, 12, 3, 7, 11, 15, 6, 2, 14, 10, 5, 1, 13, 9}, /* 4 */ { 0, 5, 10, 15, 7, 2, 13, 8, 14, 11, 4, 1, 9, 12, 3, 6}, /* 5 */ { 0, 6, 12, 10, 11, 13, 7, 1, 5, 3, 9, 15, 14, 8, 2, 4}, /* 6 */ { 0, 7, 14, 9, 15, 8, 1, 6, 13, 10, 3, 4, 2, 5, 12, 11}, /* 7 */ { 0, 8, 3, 11, 6, 14, 5, 13, 12, 4, 15, 7, 10, 2, 9, 1}, /* 8 */ { 0, 9, 1, 8, 2, 11, 3, 10, 4, 13, 5, 12, 6, 15, 7, 14}, /* 9 */ { 0, 10, 7, 13, 14, 4, 9, 3, 15, 5, 8, 2, 1, 11, 6, 12}, /* A */ { 0, 11, 5, 14, 10, 1, 15, 4, 7, 12, 2, 9, 13, 6, 8, 3}, /* B */ { 0, 12, 11, 7, 5, 9, 14, 2, 10, 6, 1, 13, 15, 3, 4, 8}, /* C */ { 0, 13, 9, 4, 1, 12, 8, 5, 2, 15, 11, 6, 3, 14, 10, 7}, /* D */ { 0, 14, 15, 1, 13, 3, 2, 12, 9, 7, 6, 8, 4, 10, 11, 5}, /* E */ { 0, 15, 13, 2, 9, 6, 4, 11, 1, 14, 12, 3, 8, 7, 5, 10} /* F */ }; static int galois_div(int num, int denom) { int i; for (i = 0; i < 16; i++) { if (galois_mul[denom][i] == num) return (i); } return (-1); } /* * Data nibbles N0-N31 => 0-31 * check nibbles C0-3 => 32-35 */ int cmd_synd2upos(uint16_t syndrome) { uint16_t s0, s1, s2, s3; if (syndrome == 0) return (-1); /* clean syndrome, not a CE */ s0 = syndrome & 0xF; s1 = (syndrome >> 4) & 0xF; s2 = (syndrome >> 8) & 0xF; s3 = (syndrome >> 12) & 0xF; if (s3 == 0) { if (s2 == 0 && s1 == 0) return (32); /* 0 0 0 e => C0 */ if (s2 == 0 && s0 == 0) return (33); /* 0 0 e 0 => C1 */ if (s1 == 0 && s0 == 0) return (34); /* 0 e 0 0 => C2 */ if (s2 == s1 && s1 == s0) return (31); /* 0 d d d => N31 */ return (-1); /* multibit error */ } else if (s2 == 0) { if (s1 == 0 && s0 == 0) return (35); /* e 0 0 0 => C4 */ if (s1 == 0 || s0 == 0) return (-1); /* not a 0 b c */ if (s3 != galois_div(galois_mul[s1][s1], s0)) return (-1); /* check nibble not valid */ return (galois_div(s0, s1) - 1); /* N0 - N14 */ } else if (s1 == 0) { if (s2 == 0 || s0 == 0) return (-1); /* not a b 0 c */ if (s3 != galois_div(galois_mul[s2][s2], s0)) return (-1); /* check nibble not valid */ return (galois_div(s0, s2) + 14); /* N15 - N29 */ } else if (s0 == 0) { if (s3 == s2 && s2 == s1) return (30); /* d d d 0 => N30 */ return (-1); } else return (-1); } int cmd_upos2dram(uint16_t upos) { /* * If and/or when x8 DIMMs are used on sun4v systems, this * function will become more complicated. */ return ((int)upos); } typedef struct tr_ent { const char *nac_component; const char *hc_component; } tr_ent_t; static tr_ent_t tr_tbl[] = { { "MB", "motherboard" }, { "CMP", "chip" }, { "BR", "branch" }, { "CH", "dram-channel" }, { "R", "rank" }, { "D", "dimm" } }; #define tr_tbl_n sizeof (tr_tbl) / sizeof (tr_ent_t) static int map_name(const char *p) { int i; for (i = 0; i < tr_tbl_n; i++) { if (strncmp(p, tr_tbl[i].nac_component, strlen(tr_tbl[i].nac_component)) == 0) return (i); } return (-1); } static int count_components(const char *str, char sep) { int num = 0; const char *cptr = str; if (*cptr == sep) cptr++; /* skip initial sep */ if (strlen(cptr) > 0) num = 1; while ((cptr = strchr(cptr, sep)) != NULL) { cptr++; if (cptr == NULL || strcmp(cptr, "") == 0) break; if (map_name(cptr) >= 0) num++; } return (num); } /* * This version of breakup_components assumes that all component names which * it sees are of the form: <nonnumeric piece><numeric piece> * i.e. no embedded numerals in component name which have to be spelled out. */ static int breakup_components(char *str, char *sep, nvlist_t **hc_nvl) { char namebuf[64], instbuf[64]; char *token, *tokbuf; int i, j, namelen, instlen; i = 0; for (token = strtok_r(str, sep, &tokbuf); token != NULL; token = strtok_r(NULL, sep, &tokbuf)) { namelen = strcspn(token, "0123456789"); instlen = strspn(token+namelen, "0123456789"); (void) strncpy(namebuf, token, namelen); namebuf[namelen] = '\0'; if ((j = map_name(namebuf)) < 0) continue; /* skip names that don't map */ if (instlen == 0) { (void) strncpy(instbuf, "0", 2); } else { (void) strncpy(instbuf, token+namelen, instlen); instbuf[instlen] = '\0'; } if (nvlist_add_string(hc_nvl[i], FM_FMRI_HC_NAME, tr_tbl[j].hc_component) != 0 || nvlist_add_string(hc_nvl[i], FM_FMRI_HC_ID, instbuf) != 0) return (-1); i++; } return (1); } nvlist_t * cmd_mem2hc(fmd_hdl_t *hdl, nvlist_t *mem_fmri) { char *nac_name, *s, *p, **sa; const char *unum = cmd_fmri_get_unum(mem_fmri); nvlist_t *fp, **hc_list; int i, n; unsigned int usi; nac_name = fmd_hdl_zalloc(hdl, strlen(unum)+1, FMD_SLEEP); if ((s = strstr(unum, ": ")) != NULL) { (void) strncpy(nac_name, unum, s-unum); /* up to ": " */ (void) strncpy(nac_name+(s-unum), "/", 2); /* add "/" and \0 */ (void) strncat(nac_name, s+2, strlen(unum)-(s+2-unum)+1); } else { (void) strcpy(nac_name, unum); } n = count_components(nac_name, '/'); hc_list = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *)*n, FMD_SLEEP); for (i = 0; i < n; i++) { (void) nvlist_alloc(&hc_list[i], NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0); } if (breakup_components(nac_name, "/", hc_list) < 0) { fmd_hdl_error(hdl, "cannot allocate components for hc-list\n"); for (i = 0; i < n; i++) { if (hc_list[i] != NULL) nvlist_free(hc_list[i]); } fmd_hdl_free(hdl, hc_list, sizeof (nvlist_t *)*n); fmd_hdl_free(hdl, nac_name, strlen(unum)+1); return (NULL); } (void) nvlist_alloc(&fp, NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0); if ((nvlist_add_uint8(fp, FM_VERSION, FM_HC_VERS0) != 0) || (nvlist_add_string(fp, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) || (nvlist_add_string(fp, FM_FMRI_HC_ROOT, "/") != 0) || (nvlist_add_uint32(fp, FM_FMRI_HC_LIST_SZ, n) != 0) || (nvlist_add_nvlist_array(fp, FM_FMRI_HC_LIST, hc_list, n) != 0)) { for (i = 0; i < n; i++) { if (hc_list[i] != NULL) nvlist_free(hc_list[i]); } fmd_hdl_free(hdl, hc_list, sizeof (nvlist_t *)*n); fmd_hdl_free(hdl, nac_name, strlen(unum)+1); nvlist_free(fp); return (NULL); } for (i = 0; i < n; i++) { if (hc_list[i] != NULL) nvlist_free(hc_list[i]); } fmd_hdl_free(hdl, hc_list, sizeof (nvlist_t *)*n); fmd_hdl_free(hdl, nac_name, strlen(unum)+1); if (nvlist_lookup_string_array(mem_fmri, FM_FMRI_HC_SERIAL_ID, &sa, &usi) == 0) { if (nvlist_add_string(fp, FM_FMRI_HC_SERIAL_ID, *sa) != 0) { nvlist_free(fp); return (NULL); } } if (nvlist_lookup_string(mem_fmri, FM_FMRI_HC_PART, &p) == 0) { if (nvlist_add_string(fp, FM_FMRI_HC_PART, p) != 0) { nvlist_free(fp); return (NULL); } } return (fp); }