Mercurial > illumos > illumos-gate
changeset 7275:1157db66a604
PSARC/2008/487 Repair Observability changes
6534561 need means of finding existing fault state of a resource
6637804 fmd should distinguish between "repaired" and "acquitted" resources
6637812 fmd_nvl_fmri_has_fault() required to report if a given fault has been diagnosed on a resourtce/fru
6663744 send list.updated events when an individual suspect in a suspect list is repaired
6682295 need fmd_nvl_fmri_replaced() functionality
6686317 extend fmd case state to include final "repair responses complete" state
6712074 need scheme/topo interface to report full service state of a resource
6720169 add location member to the sun-fm-mib
line wrap: on
line diff
--- a/usr/src/cmd/fm/dicts/FMD.dict Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/dicts/FMD.dict Sat Aug 02 03:26:27 2008 -0700 @@ -31,3 +31,5 @@ defect.sunos.fmd.module=2 defect.sunos.fmd.config=3 list.repaired=4 +list.updated=5 +list.resolved=6
--- a/usr/src/cmd/fm/dicts/FMD.po Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/dicts/FMD.po Sat Aug 02 03:26:27 2008 -0700 @@ -110,10 +110,42 @@ msgid "FMD-8000-4M.severity" msgstr "Minor" msgid "FMD-8000-4M.description" -msgstr "All faults associated with an event id have been addressed. Refer to %s for more information." +msgstr "All faults associated with an event id have been addressed.\n Refer to %s for more information." msgid "FMD-8000-4M.response" -msgstr "Any system components offlined becase of the original fault have been brought back online." +msgstr "Some system components offlined because of the original fault may have been brought back online.\n" msgid "FMD-8000-4M.impact" -msgstr "Performance degradation of the system due to the original fault has been recovered." +msgstr "Performance degradation of the system due to the original fault may have been recovered.\n" msgid "FMD-8000-4M.action" msgstr "Use fmdump -v -u <EVENT-ID> to identify the repaired components." +# +# code: FMD-8000-58 +# keys: list.updated +# +msgid "FMD-8000-58.type" +msgstr "Update" +msgid "FMD-8000-58.severity" +msgstr "Minor" +msgid "FMD-8000-58.description" +msgstr "Some faults associated with an event id have been addressed.\n Refer to %s for more information." +msgid "FMD-8000-58.response" +msgstr "Some system components offlined because of the original fault may have been brought back online.\n" +msgid "FMD-8000-58.impact" +msgstr "Performance degradation of the system due to the original fault may have been recovered.\n" +msgid "FMD-8000-58.action" +msgstr "Use fmadm faulty to identify the repaired components, and any suspects that still need to be repaired.\n" +# +# code: FMD-8000-6U +# keys: list.resolved +# +msgid "FMD-8000-6U.type" +msgstr "Resolved" +msgid "FMD-8000-6U.severity" +msgstr "Minor" +msgid "FMD-8000-6U.description" +msgstr "All faults associated with an event id have been addressed.\n Refer to %s for more information." +msgid "FMD-8000-6U.response" +msgstr "All system components offlined because of the original fault have been brought back online.\n" +msgid "FMD-8000-6U.impact" +msgstr "Performance degradation of the system due to the original fault has been recovered.\n" +msgid "FMD-8000-6U.action" +msgstr "Use fmdump -v -u <EVENT-ID> to identify the repaired components.\n"
--- a/usr/src/cmd/fm/eversholt/common/check.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/eversholt/common/check.c Sat Aug 02 03:26:27 2008 -0700 @@ -1119,6 +1119,21 @@ "argument to is_present() must be a path or a call " "to fru() or asru()"); } + } else if (np->u.func.s == L_has_fault) { + if (arglist->t == T_LIST && + (arglist->u.expr.left->t == T_NAME || + (arglist->u.expr.left->t == T_FUNC && + (arglist->u.expr.left->u.func.s == L_fru || + arglist->u.expr.left->u.func.s == L_asru))) && + arglist->u.expr.right->t == T_QUOTE) { + if (arglist->u.expr.left->t == T_FUNC) + check_func(arglist->u.expr.left); + } else { + outfl(O_ERR, arglist->file, arglist->line, + "%s() must have path or call to " + "fru() and/or asru() as first argument; " + "second argument must be a string", np->u.func.s); + } } else if (np->u.func.s == L_is_type) { if (arglist->t == T_NAME || (arglist->t == T_FUNC &&
--- a/usr/src/cmd/fm/eversholt/common/literals.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/eversholt/common/literals.h Sat Aug 02 03:26:27 2008 -0700 @@ -169,6 +169,7 @@ L_DECL(is_under); L_DECL(is_on); L_DECL(is_present); +L_DECL(has_fault); L_DECL(is_type); L_DECL(count);
--- a/usr/src/cmd/fm/fmadm/common/faulty.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmadm/common/faulty.c Sat Aug 02 03:26:27 2008 -0700 @@ -97,7 +97,7 @@ * * Fault class : fault.memory.dimm_sb * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 - * degraded but still in service + * faulted but still in service * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0) * faulty * @@ -1062,7 +1062,8 @@ name = get_nvl2str_topo(lfru); if (name != NULL) { nlp = alloc_name_list(name, lpct); - nlp->status = status & ~FM_SUSPECT_UNUSABLE; + nlp->status = status & ~(FM_SUSPECT_UNUSABLE | + FM_SUSPECT_DEGRADED); free(name); if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) == 0) @@ -1075,7 +1076,9 @@ name = get_nvl2str_topo(lasru); if (name != NULL) { nlp = alloc_name_list(name, lpct); - nlp->status = status & ~FM_SUSPECT_NOT_PRESENT; + nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT | + FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED | + FM_SUSPECT_ACQUITTED); free(name); (void) merge_name_list(asru_p, nlp, 1); } @@ -1315,11 +1318,20 @@ case 0: msg = dgettext("FMD", "ok and in service"); break; + case FM_SUSPECT_DEGRADED: + msg = dgettext("FMD", "service degraded, " + "but associated components no longer faulty"); + break; + case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: + msg = dgettext("FMD", "faulted but still " + "providing degraded service"); + break; case FM_SUSPECT_FAULTY: - msg = dgettext("FMD", "degraded but still in service"); + msg = dgettext("FMD", "faulted but still in service"); break; case FM_SUSPECT_UNUSABLE: - msg = dgettext("FMD", "unknown, not present or disabled"); + msg = dgettext("FMD", "out of service, " + "but associated components no longer faulty"); break; case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: msg = dgettext("FMD", "faulted and taken out of service"); @@ -1341,8 +1353,14 @@ msg = dgettext("FMD", "not present"); else if (status & FM_SUSPECT_FAULTY) msg = dgettext("FMD", "faulty"); + else if (status & FM_SUSPECT_REPLACED) + msg = dgettext("FMD", "replaced"); + else if (status & FM_SUSPECT_REPAIRED) + msg = dgettext("FMD", "repair attempted"); + else if (status & FM_SUSPECT_ACQUITTED) + msg = dgettext("FMD", "acquitted"); else - msg = dgettext("FMD", "repaired"); + msg = dgettext("FMD", "removed"); (void) printf("%s %s\n", label, msg); } @@ -1727,8 +1745,15 @@ (void) printf(dgettext("FMD", "not present\n")); else if (status & FM_SUSPECT_FAULTY) (void) printf(dgettext("FMD", "faulty\n")); + else if (status & FM_SUSPECT_REPLACED) + (void) printf(dgettext("FMD", "replaced\n")); + else if (status & FM_SUSPECT_REPAIRED) + (void) printf(dgettext("FMD", + "repair attempted\n")); + else if (status & FM_SUSPECT_ACQUITTED) + (void) printf(dgettext("FMD", "acquitted\n")); else - (void) printf(dgettext("FMD", "repaired\n")); + (void) printf(dgettext("FMD", "removed\n")); slp = tp->status_rec_list; end = slp; @@ -1811,6 +1836,12 @@ case 0: msg = dgettext("FMD", "ok"); break; + case FM_SUSPECT_DEGRADED: + msg = dgettext("FMD", "degraded"); + break; + case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: + msg = dgettext("FMD", "degraded"); + break; case FM_SUSPECT_FAULTY: msg = dgettext("FMD", "degraded"); break; @@ -2017,12 +2048,12 @@ return (FMADM_EXIT_USAGE); /* - * argument could be a uuid, and fmri (asru, fru or resource) + * argument could be a uuid, an fmri (asru, fru or resource) * or a label. Try uuid first, If that fails try the others. */ err = fmd_adm_case_repair(adm, argv[optind]); if (err != 0) - err = fmd_adm_rsrc_repair(adm, argv[optind]); + err = fmd_adm_rsrc_repaired(adm, argv[optind]); if (err != 0) die("failed to record repair to %s", argv[optind]); @@ -2030,3 +2061,80 @@ note("recorded repair to %s\n", argv[optind]); return (FMADM_EXIT_SUCCESS); } + +int +cmd_repaired(fmd_adm_t *adm, int argc, char *argv[]) +{ + int err; + + if (getopt(argc, argv, "") != EOF) + return (FMADM_EXIT_USAGE); + + if (argc - optind != 1) + return (FMADM_EXIT_USAGE); + + /* + * argument could be an fmri (asru, fru or resource) or a label. + */ + err = fmd_adm_rsrc_repaired(adm, argv[optind]); + if (err != 0) + die("failed to record repair to %s", argv[optind]); + + note("recorded repair to of %s\n", argv[optind]); + return (FMADM_EXIT_SUCCESS); +} + +int +cmd_replaced(fmd_adm_t *adm, int argc, char *argv[]) +{ + int err; + + if (getopt(argc, argv, "") != EOF) + return (FMADM_EXIT_USAGE); + + if (argc - optind != 1) + return (FMADM_EXIT_USAGE); + + /* + * argument could be an fmri (asru, fru or resource) or a label. + */ + err = fmd_adm_rsrc_replaced(adm, argv[optind]); + if (err != 0) + die("failed to record replacement of %s", argv[optind]); + + note("recorded replacement of %s\n", argv[optind]); + return (FMADM_EXIT_SUCCESS); +} + +int +cmd_acquit(fmd_adm_t *adm, int argc, char *argv[]) +{ + int err; + + if (getopt(argc, argv, "") != EOF) + return (FMADM_EXIT_USAGE); + + if (argc - optind != 1 && argc - optind != 2) + return (FMADM_EXIT_USAGE); + + /* + * argument could be a uuid, an fmri (asru, fru or resource) + * or a label. Or it could be a uuid and an fmri or label. + */ + if (argc - optind == 2) { + err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]); + if (err != 0) + err = fmd_adm_rsrc_acquit(adm, argv[optind + 1], + argv[optind]); + } else { + err = fmd_adm_case_acquit(adm, argv[optind]); + if (err != 0) + err = fmd_adm_rsrc_acquit(adm, argv[optind], ""); + } + + if (err != 0) + die("failed to record acquital of %s", argv[optind]); + + note("recorded acquital of %s\n", argv[optind]); + return (FMADM_EXIT_SUCCESS); +}
--- a/usr/src/cmd/fm/fmadm/common/fmadm.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmadm/common/fmadm.c Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -108,7 +108,13 @@ { cmd_flush, "flush", "<fmri> ...", "flush cached state for resource" }, { cmd_gc, "gc", "<module>", NULL }, { cmd_load, "load", "<path>", "load specified fault manager module" }, -{ cmd_repair, "repair", "<fmri>|<uuid>", "record repair to resource(s)" }, +{ cmd_repair, "repair", "<fmri>|label|<uuid>", NULL }, +{ cmd_repaired, "repaired", "<fmri>|label>", + "notify fault manager that resource has been repaired" }, +{ cmd_acquit, "acquit", "<fmri> [<uuid>] | label [<uuid>] | <uuid>", + "acquit resource or acquit case" }, +{ cmd_replaced, "replaced", "<fmri>|label", + "notify fault manager that resource has been replaced" }, { cmd_reset, "reset", "[-s serd] <module>", "reset module or sub-component" }, { cmd_rotate, "rotate", "<logname>", "rotate log file" }, { cmd_unload, "unload", "<module>", "unload specified fault manager module" },
--- a/usr/src/cmd/fm/fmadm/common/fmadm.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmadm/common/fmadm.h Sat Aug 02 03:26:27 2008 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +49,9 @@ extern int cmd_gc(fmd_adm_t *, int, char *[]); extern int cmd_load(fmd_adm_t *, int, char *[]); extern int cmd_repair(fmd_adm_t *, int, char *[]); +extern int cmd_repaired(fmd_adm_t *, int, char *[]); +extern int cmd_replaced(fmd_adm_t *, int, char *[]); +extern int cmd_acquit(fmd_adm_t *, int, char *[]); extern int cmd_reset(fmd_adm_t *, int, char *[]); extern int cmd_rotate(fmd_adm_t *, int, char *[]); extern int cmd_unload(fmd_adm_t *, int, char *[]);
--- a/usr/src/cmd/fm/fmd/common/fmd.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd.c Sat Aug 02 03:26:27 2008 -0700 @@ -255,7 +255,7 @@ { "debug", &fmd_debug_ops, NULL }, /* daemon debugging flags */ { "dictdir", &fmd_conf_string, "usr/lib/fm/dict" }, /* default diagcode dir */ { "domain", &fmd_conf_string, NULL }, /* domain id for de auth */ -{ "fakenotpresent", &fmd_conf_bool, "false" }, /* simulate rsrc not present */ +{ "fakenotpresent", &fmd_conf_uint32, "0" }, /* simulate rsrc not present */ { "fg", &fmd_conf_bool, "false" }, /* run daemon in foreground */ { "gc_interval", &fmd_conf_time, "1d" }, /* garbage collection intvl */ { "ids.avg", &fmd_conf_uint32, "4" }, /* desired idspace chain len */ @@ -272,6 +272,8 @@ { "machine", &fmd_conf_string, _fmd_uts.machine }, /* machine name (uname -m) */ { "nodiagcode", &fmd_conf_string, "-" }, /* diagcode to use if error */ { "repaircode", &fmd_conf_string, "-" }, /* diagcode for list.repaired */ +{ "resolvecode", &fmd_conf_string, "-" }, /* diagcode for list.resolved */ +{ "updatecode", &fmd_conf_string, "-" }, /* diagcode for list.updated */ { "osrelease", &fmd_conf_string, _fmd_uts.release }, /* release (uname -r) */ { "osversion", &fmd_conf_string, _fmd_uts.version }, /* version (uname -v) */ { "platform", &fmd_conf_string, _fmd_plat }, /* platform string (uname -i) */ @@ -747,6 +749,8 @@ { char *nodc_key[] = { FMD_FLT_NODC, NULL }; char *repair_key[] = { FM_LIST_REPAIRED_CLASS, NULL }; + char *resolve_key[] = { FM_LIST_RESOLVED_CLASS, NULL }; + char *update_key[] = { FM_LIST_UPDATED_CLASS, NULL }; char code_str[128]; struct sigaction act; @@ -896,6 +900,14 @@ sizeof (code_str)) == 0) (void) fmd_conf_setprop(dp->d_conf, "repaircode", code_str); + if (fmd_module_dc_key2code(dp->d_self, resolve_key, code_str, + sizeof (code_str)) == 0) + (void) fmd_conf_setprop(dp->d_conf, "resolvecode", + code_str); + if (fmd_module_dc_key2code(dp->d_self, update_key, code_str, + sizeof (code_str)) == 0) + (void) fmd_conf_setprop(dp->d_conf, "updatecode", + code_str); } fmd_rpc_init(); @@ -939,6 +951,11 @@ fmd_event_rele(e); /* + * Now replay list.updated and list.repaired events + */ + fmd_case_repair_replay(); + + /* * Finally, awaken any threads associated with receiving events from * open transports and tell them to proceed with fmd_xprt_recv(). */
--- a/usr/src/cmd/fm/fmd/common/fmd_api.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_api.c Sat Aug 02 03:26:27 2008 -0700 @@ -1137,6 +1137,20 @@ return (rv); } +void +fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid) +{ + fmd_module_t *mp = fmd_api_module_lock(hdl); + fmd_case_t *cp = fmd_case_hash_lookup(fmd.d_cases, uuid); + + if (cp != NULL) { + fmd_case_transition(cp, FMD_CASE_RESOLVED, 0); + fmd_case_rele(cp); + } + + fmd_module_unlock(mp); +} + static int fmd_case_instate(fmd_hdl_t *hdl, fmd_case_t *cp, uint_t state) { @@ -1846,6 +1860,23 @@ } int +fmd_nvl_fmri_replaced(fmd_hdl_t *hdl, nvlist_t *nvl) +{ + fmd_module_t *mp = fmd_api_module_lock(hdl); + int rv; + + if (nvl == NULL) { + fmd_api_error(mp, EFMD_NVL_INVAL, + "invalid nvlist %p\n", (void *)nvl); + } + + rv = fmd_fmri_replaced(nvl); + fmd_module_unlock(mp); + + return (rv); +} + +int fmd_nvl_fmri_unusable(fmd_hdl_t *hdl, nvlist_t *nvl) { fmd_module_t *mp = fmd_api_module_lock(hdl); @@ -1868,23 +1899,91 @@ } int -fmd_nvl_fmri_faulty(fmd_hdl_t *hdl, nvlist_t *nvl) +fmd_nvl_fmri_service_state(fmd_hdl_t *hdl, nvlist_t *nvl) { fmd_module_t *mp = fmd_api_module_lock(hdl); - fmd_asru_hash_t *ahp = fmd.d_asrus; - fmd_asru_t *ap; - int rv = 0; + int rv; if (nvl == NULL) { fmd_api_error(mp, EFMD_NVL_INVAL, "invalid nvlist %p\n", (void *)nvl); } - if ((ap = fmd_asru_hash_lookup_nvl(ahp, nvl)) != NULL) { - rv = (ap->asru_flags & FMD_ASRU_FAULTY) != 0; - fmd_asru_hash_release(ahp, ap); + rv = fmd_fmri_service_state(nvl); + if (rv < 0) + rv = fmd_fmri_unusable(nvl) ? FMD_SERVICE_STATE_UNUSABLE : + FMD_SERVICE_STATE_OK; + fmd_module_unlock(mp); + + if (rv < 0) { + fmd_api_error(mp, EFMD_FMRI_OP, "invalid fmri for " + "fmd_nvl_fmri_service_state\n"); + } + + return (rv); +} + +typedef struct { + const char *class; + int *rvp; +} fmd_has_fault_arg_t; + +static void +fmd_rsrc_has_fault(fmd_asru_link_t *alp, void *arg) +{ + fmd_has_fault_arg_t *fhfp = (fmd_has_fault_arg_t *)arg; + char *class; + + if (fhfp->class == NULL) { + if (alp->al_flags & FMD_ASRU_FAULTY) + *fhfp->rvp = 1; + } else { + if ((alp->al_flags & FMD_ASRU_FAULTY) && + alp->al_event != NULL && nvlist_lookup_string(alp->al_event, + FM_CLASS, &class) == 0 && fmd_strmatch(class, fhfp->class)) + *fhfp->rvp = 1; } - +} + +int +fmd_nvl_fmri_has_fault(fmd_hdl_t *hdl, nvlist_t *nvl, int type, char *class) +{ + fmd_module_t *mp = fmd_api_module_lock(hdl); + fmd_asru_hash_t *ahp = fmd.d_asrus; + int rv = 0; + char *name; + int namelen; + fmd_has_fault_arg_t fhf; + + if (nvl == NULL) { + fmd_api_error(mp, EFMD_NVL_INVAL, + "invalid nvlist %p\n", (void *)nvl); + } + if ((namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) + fmd_api_error(mp, EFMD_NVL_INVAL, + "invalid nvlist: %p\n", (void *)nvl); + name = fmd_alloc(namelen + 1, FMD_SLEEP); + if (fmd_fmri_nvl2str(nvl, name, namelen + 1) == -1) { + if (name != NULL) + fmd_free(name, namelen + 1); + fmd_api_error(mp, EFMD_NVL_INVAL, + "invalid nvlist: %p\n", (void *)nvl); + } + + fhf.class = class; + fhf.rvp = &rv; + if (type == FMD_HAS_FAULT_RESOURCE) + fmd_asru_hash_apply_by_rsrc(ahp, name, fmd_rsrc_has_fault, + &fhf); + else if (type == FMD_HAS_FAULT_ASRU) + fmd_asru_hash_apply_by_asru(ahp, name, fmd_rsrc_has_fault, + &fhf); + else if (type == FMD_HAS_FAULT_FRU) + fmd_asru_hash_apply_by_fru(ahp, name, fmd_rsrc_has_fault, + &fhf); + + if (name != NULL) + fmd_free(name, namelen + 1); fmd_module_unlock(mp); return (rv); }
--- a/usr/src/cmd/fm/fmd/common/fmd_api.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_api.h Sat Aug 02 03:26:27 2008 -0700 @@ -178,6 +178,7 @@ extern fmd_case_t *fmd_case_uulookup(fmd_hdl_t *, const char *); extern void fmd_case_uuclose(fmd_hdl_t *, const char *); extern int fmd_case_uuclosed(fmd_hdl_t *, const char *); +extern void fmd_case_uuresolved(fmd_hdl_t *, const char *); extern int fmd_case_solved(fmd_hdl_t *, fmd_case_t *); extern int fmd_case_closed(fmd_hdl_t *, fmd_case_t *); @@ -225,7 +226,14 @@ extern int fmd_nvl_fmri_expand(fmd_hdl_t *, nvlist_t *); extern int fmd_nvl_fmri_present(fmd_hdl_t *, nvlist_t *); extern int fmd_nvl_fmri_unusable(fmd_hdl_t *, nvlist_t *); -extern int fmd_nvl_fmri_faulty(fmd_hdl_t *, nvlist_t *); +extern int fmd_nvl_fmri_replaced(fmd_hdl_t *, nvlist_t *); +extern int fmd_nvl_fmri_service_state(fmd_hdl_t *, nvlist_t *); +extern int fmd_nvl_fmri_has_fault(fmd_hdl_t *, nvlist_t *, int, char *); + +#define FMD_HAS_FAULT_FRU 0 +#define FMD_HAS_FAULT_ASRU 1 +#define FMD_HAS_FAULT_RESOURCE 2 + extern int fmd_nvl_fmri_contains(fmd_hdl_t *, nvlist_t *, nvlist_t *); extern nvlist_t *fmd_nvl_fmri_translate(fmd_hdl_t *, nvlist_t *, nvlist_t *);
--- a/usr/src/cmd/fm/fmd/common/fmd_api.map Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_api.map Sat Aug 02 03:26:27 2008 -0700 @@ -49,6 +49,7 @@ fmd_case_uuclosed = FUNCTION extern; fmd_case_uuid = FUNCTION extern; fmd_case_uulookup = FUNCTION extern; + fmd_case_uuresolved = FUNCTION extern; fmd_event_local = FUNCTION extern; fmd_event_ena_create = FUNCTION extern; @@ -80,8 +81,10 @@ fmd_nvl_dup = FUNCTION extern; fmd_nvl_fmri_expand = FUNCTION extern; fmd_nvl_fmri_present = FUNCTION extern; + fmd_nvl_fmri_replaced = FUNCTION extern; fmd_nvl_fmri_unusable = FUNCTION extern; - fmd_nvl_fmri_faulty = FUNCTION extern; + fmd_nvl_fmri_service_state = FUNCTION extern; + fmd_nvl_fmri_has_fault = FUNCTION extern; fmd_nvl_fmri_contains = FUNCTION extern; fmd_nvl_fmri_translate = FUNCTION extern;
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_asru.c Sat Aug 02 03:26:27 2008 -0700 @@ -164,7 +164,7 @@ } static int -fmd_asru_is_present(nvlist_t *event) +fmd_asru_replacement_state(nvlist_t *event) { int ps = -1; nvlist_t *asru, *fru, *rsrc; @@ -181,16 +181,36 @@ * as still present. */ if (fmd_asru_fake_not_present) - ps = 0; - if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) - ps = fmd_fmri_present(asru); - if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, - &rsrc) == 0) - ps = fmd_fmri_present(rsrc); - if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) - ps = fmd_fmri_present(fru); + return (fmd_asru_fake_not_present); + if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) + ps = fmd_fmri_replaced(asru); + if (ps == -1) { + if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0) + ps = fmd_fmri_replaced(rsrc); + } else if (ps == FMD_OBJ_STATE_UNKNOWN) { + /* see if we can improve on UNKNOWN */ + if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, + &rsrc) == 0) { + int ps2 = fmd_fmri_replaced(rsrc); + if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || + ps2 == FMD_OBJ_STATE_REPLACED) + ps = ps2; + } + } + if (ps == -1) { + if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) + ps = fmd_fmri_replaced(fru); + } else if (ps == FMD_OBJ_STATE_UNKNOWN) { + /* see if we can improve on UNKNOWN */ + if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) { + int ps2 = fmd_fmri_replaced(fru); + if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || + ps2 == FMD_OBJ_STATE_REPLACED) + ps = ps2; + } + } if (ps == -1) - ps = 1; + ps = FMD_OBJ_STATE_UNKNOWN; return (ps); } @@ -404,7 +424,10 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) { nvlist_t *nvl = FMD_EVENT_NVL(ep); - boolean_t f, u, ps, us; + boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; + int ps; + boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; + boolean_t acquitted = FMD_B_FALSE; nvlist_t *flt, *flt_copy, *asru; char *case_uuid = NULL, *case_code = NULL; fmd_asru_t *ap; @@ -420,7 +443,8 @@ /* * Extract the most recent values of 'faulty' from the event log. */ - if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0) { + if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, + &faulty) != 0) { fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " "invalid event log record\n", lp->log_name); ahp->ah_error = EFMD_ASRU_EVENT; @@ -434,16 +458,25 @@ } (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); + (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, + &unusable); + (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, + &repaired); + (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, + &replaced); + (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, + &acquitted); /* - * Attempt to recreate the case in the CLOSED state. + * Attempt to recreate the case in either the CLOSED or REPAIRED state + * (depending on whether the faulty bit is still set). * If the case is already present, fmd_case_recreate() will return it. * If not, we'll create a new orphaned case. Either way, we use the * ASRU event to insert a suspect into the partially-restored case. */ fmd_module_lock(fmd.d_rmod); - cp = fmd_case_recreate(fmd.d_rmod, NULL, FMD_CASE_CLOSED, case_uuid, - case_code); + cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : + FMD_CASE_REPAIRED, case_uuid, case_code); fmd_case_hold(cp); fmd_module_unlock(fmd.d_rmod); if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, @@ -478,37 +511,31 @@ ap = alp->al_asru; /* - * Check to see if the resource is still present in the system. If - * so, then update the value of the unusable bit based on the current - * system configuration. If not, then consider unusable. + * Check to see if the resource is still present in the system. */ - ps = fmd_asru_is_present(flt); - if (ps) { - if (nvlist_lookup_nvlist(flt, FM_FAULT_ASRU, &asru) != 0) - u = FMD_B_FALSE; - else if ((us = fmd_fmri_unusable(asru)) == -1) { - fmd_error(EFMD_ASRU_FMRI, "failed to update " - "status of asru %s", lp->log_name); - u = FMD_B_FALSE; - } else - u = us != 0; - - } else - u = FMD_B_TRUE; /* not present; set unusable */ + ps = fmd_asru_replacement_state(flt); + if (ps == FMD_OBJ_STATE_STILL_PRESENT || ps == FMD_OBJ_STATE_UNKNOWN) + ap->asru_flags |= FMD_ASRU_PRESENT; + else if (ps == FMD_OBJ_STATE_REPLACED) + replaced = FMD_B_TRUE; nvlist_free(flt); ap->asru_flags |= FMD_ASRU_RECREATED; - if (ps) - ap->asru_flags |= FMD_ASRU_PRESENT; - if (f) { + if (faulty) { alp->al_flags |= FMD_ASRU_FAULTY; ap->asru_flags |= FMD_ASRU_FAULTY; } - if (u) { + if (unusable) { alp->al_flags |= FMD_ASRU_UNUSABLE; ap->asru_flags |= FMD_ASRU_UNUSABLE; } + if (replaced) + alp->al_reason = FMD_ASRU_REPLACED; + else if (repaired) + alp->al_reason = FMD_ASRU_REPAIRED; + else if (acquitted) + alp->al_reason = FMD_ASRU_ACQUITTED; TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); @@ -629,29 +656,34 @@ * Check if the resource is still present. If not, and if the rsrc.age time * has expired, then do an implicit repair on the resource. */ +/*ARGSUSED*/ static void -fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *er) +fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) { struct timeval tv; fmd_log_t *lp; hrtime_t hrt; + int ps; + int err; - if (fmd_asru_is_present(alp->al_event)) - return; - fmd_time_gettimeofday(&tv); - lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); - hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); - fmd_log_rele(lp); - if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) - fmd_asru_repair(alp, er); + ps = fmd_asru_replacement_state(alp->al_event); + if (ps == FMD_OBJ_STATE_REPLACED) { + fmd_asru_replaced(alp, &err); + } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { + fmd_time_gettimeofday(&tv); + lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, + FMD_LOG_ASRU); + hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); + fmd_log_rele(lp); + if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) + fmd_asru_removed(alp); + } } void fmd_asru_clear_aged_rsrcs() { - int err; - - fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, &err); + fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); } fmd_asru_hash_t * @@ -881,25 +913,6 @@ } /* - * Lookup an asru in the hash and place a hold on it. - */ -fmd_asru_t * -fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri) -{ - fmd_asru_t *ap; - char *name = NULL; - ssize_t namelen; - - if (fmd_asru_get_namestr(fmri, &name, &namelen) != 0) - return (NULL); - (void) pthread_rwlock_rdlock(&ahp->ah_lock); - ap = fmd_asru_hash_lookup(ahp, name); - (void) pthread_rwlock_unlock(&ahp->ah_lock); - fmd_free(name, namelen + 1); - return (ap); -} - -/* * Create a resource cache entry using the fault event "nvl" for one of the * suspects from the case "cp". * @@ -1109,12 +1122,13 @@ fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er) { if (er && alp->al_asru_fmri && fmd_fmri_contains(er, - alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY)) + alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, + FMD_ASRU_REPAIRED)) fmd_case_update(alp->al_case); } void -fmd_asru_repair(fmd_asru_link_t *alp, void *er) +fmd_asru_repaired(fmd_asru_link_t *alp, void *er) { int flags; int rval; @@ -1122,7 +1136,7 @@ /* * repair this asru cache entry */ - rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY); + rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED); /* * now check if all entries associated with this asru are repaired and @@ -1149,12 +1163,134 @@ } static void +fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er) +{ + if (er && alp->al_asru_fmri && fmd_fmri_contains(er, + alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, + FMD_ASRU_ACQUITTED)) + fmd_case_update(alp->al_case); +} + +void +fmd_asru_acquit(fmd_asru_link_t *alp, void *er) +{ + int flags; + int rval; + + /* + * acquit this asru cache entry + */ + rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED); + + /* + * now check if all entries associated with this asru are acquitted and + * if so acquit containees + */ + (void) pthread_mutex_lock(&alp->al_asru->asru_lock); + flags = alp->al_asru->asru_flags; + (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); + if (!(flags & FMD_ASRU_FAULTY)) + fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee, + alp->al_asru_fmri); + + /* + * if called from fmd_adm_acquit() and we really did clear the bit then + * we need to do a case update to see if the associated case can be + * repaired. No need to do this if called from fmd_case_acquit() (ie + * when er is NULL) as the case will be explicitly repaired anyway. + */ + if (er) { + *(int *)er = 0; + if (rval) + fmd_case_update(alp->al_case); + } +} + +static void +fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er) +{ + if (er && alp->al_asru_fmri && fmd_fmri_contains(er, + alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, + FMD_ASRU_REPLACED)) + fmd_case_update(alp->al_case); +} + +void +fmd_asru_replaced(fmd_asru_link_t *alp, void *er) +{ + int flags; + int rval; + int ps; + + ps = fmd_asru_replacement_state(alp->al_event); + if (ps == FMD_OBJ_STATE_STILL_PRESENT) + return; + + /* + * mark this cache entry as replaced + */ + rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED); + + /* + * now check if all entries associated with this asru are replaced and + * if so replace containees + */ + (void) pthread_mutex_lock(&alp->al_asru->asru_lock); + flags = alp->al_asru->asru_flags; + (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); + if (!(flags & FMD_ASRU_FAULTY)) + fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee, + alp->al_asru_fmri); + + *(int *)er = 0; + if (rval) + fmd_case_update(alp->al_case); +} + +static void +fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er) +{ + if (er && alp->al_asru_fmri && fmd_fmri_contains(er, + alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, + 0)) + fmd_case_update(alp->al_case); +} + +void +fmd_asru_removed(fmd_asru_link_t *alp) +{ + int flags; + int rval; + + /* + * mark this cache entry as replacded + */ + rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0); + + /* + * now check if all entries associated with this asru are removed and + * if so replace containees + */ + (void) pthread_mutex_lock(&alp->al_asru->asru_lock); + flags = alp->al_asru->asru_flags; + (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); + if (!(flags & FMD_ASRU_FAULTY)) + fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee, + alp->al_asru_fmri); + if (rval) + fmd_case_update(alp->al_case); +} + +static void fmd_asru_logevent(fmd_asru_link_t *alp) { fmd_asru_t *ap = alp->al_asru; - boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0; - boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0; - boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; + boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; + boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; + boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; + boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); + boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); + boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); fmd_case_impl_t *cip; fmd_event_t *e; @@ -1172,9 +1308,9 @@ if (lp == NULL) return; /* can't log events if we can't open the log */ - nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)], - alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m, - alp->al_event, &cip->ci_tv); + nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], + alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, + message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted); (void) nvlist_lookup_string(nvl, FM_CLASS, &class); e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); @@ -1224,7 +1360,7 @@ } int -fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag) +fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) { fmd_asru_t *ap = alp->al_asru; fmd_asru_link_t *nalp; @@ -1240,9 +1376,16 @@ nstate = alp->al_flags & FMD_ASRU_STATE; if (nstate == ostate) { + if (reason > alp->al_reason) { + alp->al_reason = reason; + fmd_asru_logevent(alp); + (void) pthread_cond_broadcast(&ap->asru_cv); + } (void) pthread_mutex_unlock(&ap->asru_lock); return (0); } + if (reason > alp->al_reason) + alp->al_reason = reason; if (sflag == FMD_ASRU_UNUSABLE) ap->asru_flags &= ~sflag; @@ -1277,15 +1420,36 @@ { int us, st; nvlist_t *asru; + int ps; - if (fmd_asru_is_present(alp->al_event) == 0) + ps = fmd_asru_replacement_state(alp->al_event); + if (ps == FMD_OBJ_STATE_NOT_PRESENT) return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); + if (ps == FMD_OBJ_STATE_REPLACED) { + if (alp->al_reason < FMD_ASRU_REPLACED) + alp->al_reason = FMD_ASRU_REPLACED; + return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); + } - if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) - us = fmd_fmri_unusable(asru); - else + st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; + if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { + us = fmd_fmri_service_state(asru); + if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { + /* not supported by scheme - try fmd_fmri_unusable */ + us = fmd_fmri_unusable(asru); + } else if (us == FMD_SERVICE_STATE_UNUSABLE) { + st |= FMD_ASRU_UNUSABLE; + return (st); + } else if (us == FMD_SERVICE_STATE_OK) { + st &= ~FMD_ASRU_UNUSABLE; + return (st); + } else if (us == FMD_SERVICE_STATE_DEGRADED) { + st &= ~FMD_ASRU_UNUSABLE; + st |= FMD_ASRU_DEGRADED; + return (st); + } + } else us = (alp->al_flags & FMD_ASRU_UNUSABLE); - st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; if (us > 0) st |= FMD_ASRU_UNUSABLE; else if (us == 0) @@ -1307,7 +1471,8 @@ int us, st; if (!(ap->asru_flags & FMD_ASRU_INTERNAL) && - (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0)) + (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED || + fmd_fmri_present(ap->asru_fmri) <= 0)) return (0); /* do not report non-fmd non-present resources */ us = fmd_fmri_unusable(ap->asru_fmri);
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_asru.h Sat Aug 02 03:26:27 2008 -0700 @@ -89,6 +89,7 @@ nvlist_t *al_event; /* event associated with last change */ uint_t al_refs; /* reference count */ uint_t al_flags; /* flags (see below) */ + uint8_t al_reason; /* repair reason (see below) */ } fmd_asru_link_t; #define FMD_ASRU_FAULTY 0x01 /* asru has been diagnosed as faulty */ @@ -98,6 +99,15 @@ #define FMD_ASRU_INVISIBLE 0x10 /* asru is not visibly administered */ #define FMD_ASRU_RECREATED 0x20 /* asru recreated by cache replay */ #define FMD_ASRU_PRESENT 0x40 /* asru present at last R$ update */ +#define FMD_ASRU_DEGRADED 0x80 /* asru service is degraded */ + +/* + * Note the following are defined in order of increasing precedence and + * this should not be changed + */ +#define FMD_ASRU_ACQUITTED 1 /* asru acquitted */ +#define FMD_ASRU_REPAIRED 2 /* asru repaired */ +#define FMD_ASRU_REPLACED 3 /* asru replaced */ #define FMD_ASRU_STATE (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE) @@ -146,16 +156,18 @@ void (*)(fmd_asru_link_t *, void *), void *); extern fmd_asru_t *fmd_asru_hash_lookup_name(fmd_asru_hash_t *, const char *); -extern fmd_asru_t *fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *, nvlist_t *); extern fmd_asru_link_t *fmd_asru_hash_create_entry(fmd_asru_hash_t *, fmd_case_t *, nvlist_t *); extern void fmd_asru_hash_release(fmd_asru_hash_t *, fmd_asru_t *); extern void fmd_asru_hash_delete_case(fmd_asru_hash_t *, fmd_case_t *); extern void fmd_asru_clear_aged_rsrcs(); -extern void fmd_asru_repair(fmd_asru_link_t *, void *); +extern void fmd_asru_repaired(fmd_asru_link_t *, void *); +extern void fmd_asru_acquit(fmd_asru_link_t *, void *); +extern void fmd_asru_replaced(fmd_asru_link_t *, void *); +extern void fmd_asru_removed(fmd_asru_link_t *); extern int fmd_asru_setflags(fmd_asru_link_t *, uint_t); -extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t); +extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t, uint8_t); extern int fmd_asru_al_getstate(fmd_asru_link_t *); extern int fmd_asru_getstate(fmd_asru_t *);
--- a/usr/src/cmd/fm/fmd/common/fmd_case.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_case.c Sat Aug 02 03:26:27 2008 -0700 @@ -50,16 +50,37 @@ * +------------+ * +----------| UNSOLVED | * | +------------+ - * 1 | 4 | - * | | - * +----v---+ /-2->+------v-----+ 3 +--------+ - * | SOLVED |< | CLOSE_WAIT |--------->| CLOSED | - * +--------+ \-5->+------------+ +--------+ - * | | - * 6 | | 7 - * +------v-----+ | - * | REPAIRED |<-------------+ + * | 1 | + * | | + * | +-------v----+ + * 2 | | SOLVED | + * | +------------+ + * | 3 | 5 | + * +------------+ | | + * | | | + * +-v---v----v-+ + * | CLOSE_WAIT | * +------------+ + * | | | + * +-----------+ | +------------+ + * | 4 | | + * v +-----v------+ | + * discard | CLOSED | 6 | + * +------------+ | + * | | + * | +------------+ + * 7 | | + * +-----v----v-+ + * | REPAIRED | + * +------------+ + * | + * 8 | + * +-----v------+ + * | RESOLVED | + * +------------+ + * | + * v + * discard * * The state machine changes are triggered by calls to fmd_case_transition() * from various locations inside of fmd, as described below: @@ -70,34 +91,37 @@ * suspects convicted are marked faulty (F) in R$ * list.suspect event logged and dispatched * - * [2] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose() + * [2] Called by: fmd_case_close(), fmd_case_uuclose() + * Actions: diagnosis engine fmdo_close() entry point scheduled + * case discarded upon exit from CLOSE_WAIT + * + * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose() * Actions: FMD_CF_ISOLATED flag is set in ci_flags * suspects convicted (F) are marked unusable (U) in R$ * diagnosis engine fmdo_close() entry point scheduled - * case transitions to CLOSED [3] upon exit from CLOSE_WAIT + * case transitions to CLOSED [4] upon exit from CLOSE_WAIT * - * [3] Called by: fmd_case_delete() (after fmdo_close() entry point returns) + * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns) * Actions: list.isolated event dispatched * case deleted from module's list of open cases * - * [4] Called by: fmd_case_close(), fmd_case_uuclose() - * Actions: diagnosis engine fmdo_close() entry point scheduled - * case is subsequently discarded by fmd_case_delete() - * * [5] Called by: fmd_case_repair(), fmd_case_update() * Actions: FMD_CF_REPAIR flag is set in ci_flags * diagnosis engine fmdo_close() entry point scheduled * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT * - * [6] Called by: fmd_case_repair(), fmd_case_update() - * Actions: FMD_CF_REPAIR flag is set in ci_flags - * suspects convicted are marked non faulty (!F) in R$ - * list.repaired event dispatched + * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns) + * Actions: suspects convicted are marked non faulty (!F) in R$ + * list.repaired or list.updated event dispatched * * [7] Called by: fmd_case_repair(), fmd_case_update() * Actions: FMD_CF_REPAIR flag is set in ci_flags * suspects convicted are marked non faulty (!F) in R$ - * list.repaired event dispatched + * list.repaired or list.updated event dispatched + * + * [8] Called by: fmd_case_uuresolve() + * Actions: list.resolved event dispatched + * case is discarded */ #include <sys/fm/protocol.h> @@ -128,11 +152,10 @@ "SOLVED", /* FMD_CASE_SOLVED */ "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */ "CLOSED", /* FMD_CASE_CLOSED */ - "REPAIRED" /* FMD_CASE_REPAIRED */ + "REPAIRED", /* FMD_CASE_REPAIRED */ + "RESOLVED" /* FMD_CASE_RESOLVED */ }; -extern volatile uint32_t fmd_asru_fake_not_present; - static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *); fmd_case_hash_t * @@ -300,12 +323,20 @@ *entryp->fcl_msgp = B_FALSE; entryp->fcl_ba[*entryp->fcl_countp] = 0; state = fmd_asru_al_getstate(alp); + if (state & FMD_ASRU_DEGRADED) + entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED; if (state & FMD_ASRU_UNUSABLE) entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE; if (state & FMD_ASRU_FAULTY) entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY; if (!(state & FMD_ASRU_PRESENT)) entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT; + if (alp->al_reason == FMD_ASRU_REPAIRED) + entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED; + else if (alp->al_reason == FMD_ASRU_REPLACED) + entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED; + else if (alp->al_reason == FMD_ASRU_ACQUITTED) + entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED; entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event; (*entryp->fcl_countp)++; } @@ -326,6 +357,29 @@ *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE); } +static void +fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg) +{ + int *not_faultyp = (int *)arg; + + *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY); +} + +/* + * Have we got any suspects with an asru that are still unusable and present? + */ +static void +fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg) +{ + int *rvalp = (int *)arg; + int state = fmd_asru_al_getstate(alp); + nvlist_t *asru; + + if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0) + return; + *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT)); +} + nvlist_t * fmd_case_mkevent(fmd_case_t *cp, const char *class) { @@ -359,11 +413,15 @@ if (cip->ci_code == NULL) (void) fmd_case_mkcode(cp); /* - * For repair event, we lookup diagcode from dict using key - * "list.repaired". + * For repair and updated event, we lookup diagcode from dict using key + * "list.repaired" or "list.updated" or "list.resolved". */ if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code); + else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) + (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code); + else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) + (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code); else code = cip->ci_code; @@ -555,7 +613,7 @@ "%s: %s\n", cip->ci_uuid, fmd_strerror(errno)); continue; } - (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE); + (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); } @@ -630,6 +688,16 @@ (void) pthread_rwlock_unlock(&fmd.d_log_lock); fmd_dispq_dispatch(fmd.d_disp, e, class); break; + + case FMD_CASE_RESOLVED: + nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS); + (void) nvlist_lookup_string(nvl, FM_CLASS, &class); + e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); + (void) pthread_rwlock_rdlock(&fmd.d_log_lock); + fmd_log_append(fmd.d_fltlog, e, cp); + (void) pthread_rwlock_unlock(&fmd.d_log_lock); + fmd_dispq_dispatch(fmd.d_disp, e, class); + break; } } @@ -805,7 +873,7 @@ fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP); fmd_case_impl_t *eip; - ASSERT(state < FMD_CASE_REPAIRED); + ASSERT(state < FMD_CASE_RESOLVED); (void) pthread_mutex_init(&cip->ci_lock, NULL); fmd_buf_hash_create(&cip->ci_bufs); @@ -841,6 +909,14 @@ * return the existing case that we found without changing it. */ if (mp == fmd.d_rmod) { + /* + * When recreating an orphan case, state passed in may + * either be CLOSED (faulty) or REPAIRED (!faulty). If + * any suspects are still CLOSED (faulty) then the + * overall state needs to be CLOSED. + */ + if (state == FMD_CASE_CLOSED) + cip->ci_state = FMD_CASE_CLOSED; (void) pthread_mutex_unlock(&cip->ci_lock); fmd_case_rele((fmd_case_t *)cip); return ((fmd_case_t *)cip); @@ -1107,7 +1183,8 @@ boolean_t b; (void) pthread_mutex_lock(&cip->ci_lock); - ASSERT(cip->ci_state == FMD_CASE_CLOSED); + ASSERT(cip->ci_state == FMD_CASE_CLOSED || + cip->ci_state == FMD_CASE_REPAIRED); ASSERT(cip->ci_mod == fmd.d_rmod); cis->cis_next = cip->ci_suspects; @@ -1156,8 +1233,10 @@ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; fmd_case_item_t *cit; fmd_event_t *e; + int resolved = 0; + int any_unusable_and_present = 0; - ASSERT(state <= FMD_CASE_REPAIRED); + ASSERT(state <= FMD_CASE_RESOLVED); (void) pthread_mutex_lock(&cip->ci_lock); if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED)) @@ -1211,9 +1290,49 @@ case FMD_CASE_REPAIRED: ASSERT(fmd_case_orphaned(cp)); + + /* + * If all suspects are already either usable or not present then + * transition straight to RESOLVED state, publishing both the + * list.repaired and list.resolved. + */ + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, + fmd_case_unusable_and_present, &any_unusable_and_present); + if (any_unusable_and_present) + break; + fmd_module_lock(cip->ci_mod); fmd_list_delete(&cip->ci_mod->mod_cases, cip); fmd_module_unlock(cip->ci_mod); + cip->ci_state = FMD_CASE_RESOLVED; + (void) pthread_mutex_unlock(&cip->ci_lock); + fmd_case_publish(cp, state); + TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid, + _fmd_case_snames[FMD_CASE_REPAIRED], + _fmd_case_snames[FMD_CASE_RESOLVED])); + state = FMD_CASE_RESOLVED; + resolved = 1; + (void) pthread_mutex_lock(&cip->ci_lock); + break; + + case FMD_CASE_RESOLVED: + ASSERT(fmd_case_orphaned(cp)); + + /* + * If all suspects are already either usable or not present then + * carry on, publish list.resolved and discard the case. + */ + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, + fmd_case_unusable_and_present, &any_unusable_and_present); + if (any_unusable_and_present) { + (void) pthread_mutex_unlock(&cip->ci_lock); + return; + } + + fmd_module_lock(cip->ci_mod); + fmd_list_delete(&cip->ci_mod->mod_cases, cip); + fmd_module_unlock(cip->ci_mod); + resolved = 1; break; } @@ -1236,12 +1355,13 @@ fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e); } - /* - * If we transitioned to REPAIRED, adjust the reference count to - * reflect our removal from fmd.d_rmod->mod_cases. If the caller has - * not placed an additional hold on the case, it will now be freed. - */ - if (state == FMD_CASE_REPAIRED) { + if (resolved) { + /* + * If we transitioned to RESOLVED, adjust the reference count to + * reflect our removal from fmd.d_rmod->mod_cases above. If the + * caller has not placed an additional hold on the case, it + * will now be freed. + */ (void) pthread_mutex_lock(&cip->ci_lock); fmd_asru_hash_delete_case(fmd.d_asrus, cp); (void) pthread_mutex_unlock(&cip->ci_lock); @@ -1254,34 +1374,23 @@ * re-validating the suspect list using the resource cache. This function is * employed by the checkpoint code when restoring a saved, solved case to see * if the state of the case has effectively changed while fmd was not running - * or the module was not loaded. If none of the suspects are present anymore, - * advance the state to REPAIRED. If none are usable, advance to CLOSE_WAIT. + * or the module was not loaded. */ void fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags) { fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; - int faulty = 0; /* are any suspects faulty? */ int usable = 0; /* are any suspects usable? */ ASSERT(state >= FMD_CASE_SOLVED); (void) pthread_mutex_lock(&cip->ci_lock); - fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable); (void) pthread_mutex_unlock(&cip->ci_lock); - /* - * If none of the suspects were faulty, it implies they were either - * repaired already or not present and the rsrc.age time has expired. - * We can move the state on to repaired. - */ - if (!faulty) { - state = MAX(state, FMD_CASE_CLOSE_WAIT); - flags |= FMD_CF_REPAIRED; - } else if (!usable) { + if (!usable) { state = MAX(state, FMD_CASE_CLOSE_WAIT); flags |= FMD_CF_ISOLATED; } @@ -1361,8 +1470,20 @@ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); (void) pthread_mutex_unlock(&cip->ci_lock); - if (faulty) + if (faulty) { + nvlist_t *nvl; + fmd_event_t *e; + char *class; + + nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); + (void) nvlist_lookup_string(nvl, FM_CLASS, &class); + e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); + (void) pthread_rwlock_rdlock(&fmd.d_log_lock); + fmd_log_append(fmd.d_fltlog, e, cp); + (void) pthread_rwlock_unlock(&fmd.d_log_lock); + fmd_dispq_dispatch(fmd.d_disp, e, class); return; /* one or more suspects are still marked faulty */ + } if (cstate == FMD_CASE_CLOSED) fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); @@ -1480,7 +1601,42 @@ return (0); /* already repaired */ } - fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repair, NULL); + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, NULL); + (void) pthread_mutex_unlock(&cip->ci_lock); + + if (cstate == FMD_CASE_CLOSED) + fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); + else + fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); + + return (0); +} + +int +fmd_case_acquit(fmd_case_t *cp) +{ + fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; + uint_t cstate; + + (void) pthread_mutex_lock(&cip->ci_lock); + cstate = cip->ci_state; + + if (cip->ci_xprt != NULL) { + (void) pthread_mutex_unlock(&cip->ci_lock); + return (fmd_set_errno(EFMD_CASE_OWNER)); + } + + if (cstate < FMD_CASE_SOLVED) { + (void) pthread_mutex_unlock(&cip->ci_lock); + return (fmd_set_errno(EFMD_CASE_STATE)); + } + + if (cip->ci_flags & FMD_CF_REPAIRED) { + (void) pthread_mutex_unlock(&cip->ci_lock); + return (0); /* already repaired */ + } + + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_acquit, NULL); (void) pthread_mutex_unlock(&cip->ci_lock); if (cstate == FMD_CASE_CLOSED) @@ -1535,3 +1691,72 @@ ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec; ((fmd_case_impl_t *)cp)->ci_tv_valid = 1; } + +/*ARGSUSED*/ +void +fmd_case_repair_replay_case(fmd_case_t *cp, void *arg) +{ + int not_faulty = 0; + int faulty = 0; + nvlist_t *nvl; + fmd_event_t *e; + char *class; + int any_unusable_and_present = 0; + fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; + + if (cip->ci_state < FMD_CASE_SOLVED) + return; + + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty, + ¬_faulty); + + if (!faulty) { + /* + * If none of the suspects is faulty, replay the list.repaired. + * If all suspects are already either usable or not present then + * also transition straight to RESOLVED state. + */ + fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, + fmd_case_unusable_and_present, &any_unusable_and_present); + if (!any_unusable_and_present) { + fmd_module_lock(cip->ci_mod); + fmd_list_delete(&cip->ci_mod->mod_cases, cip); + fmd_module_unlock(cip->ci_mod); + cip->ci_state = FMD_CASE_RESOLVED; + + nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); + (void) nvlist_lookup_string(nvl, FM_CLASS, &class); + e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, + class); + fmd_dispq_dispatch(fmd.d_disp, e, class); + + fmd_case_publish(cp, FMD_CASE_RESOLVED); + (void) pthread_mutex_lock(&cip->ci_lock); + fmd_asru_hash_delete_case(fmd.d_asrus, cp); + (void) pthread_mutex_unlock(&cip->ci_lock); + fmd_case_rele(cp); + } else { + nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); + (void) nvlist_lookup_string(nvl, FM_CLASS, &class); + e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, + class); + fmd_dispq_dispatch(fmd.d_disp, e, class); + } + } else if (not_faulty) { + /* + * if some but not all of the suspects are not faulty, replay + * the list.updated. + */ + nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); + (void) nvlist_lookup_string(nvl, FM_CLASS, &class); + e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); + fmd_dispq_dispatch(fmd.d_disp, e, class); + } +} + +void +fmd_case_repair_replay() +{ + fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL); +}
--- a/usr/src/cmd/fm/fmd/common/fmd_case.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_case.h Sat Aug 02 03:26:27 2008 -0700 @@ -84,7 +84,8 @@ #define FMD_CASE_SOLVED 1 /* case is solved (suspects added) */ #define FMD_CASE_CLOSE_WAIT 2 /* case is executing fmdo_close() */ #define FMD_CASE_CLOSED 3 /* case is closed (reconfig done) */ -#define FMD_CASE_REPAIRED 4 /* case is repaired (can be freed) */ +#define FMD_CASE_REPAIRED 4 /* case is repaired */ +#define FMD_CASE_RESOLVED 5 /* case is resolved (can be freed) */ #define FMD_CF_DIRTY 0x01 /* case is in need of checkpoint */ #define FMD_CF_SOLVED 0x02 /* case has been solved */ @@ -138,8 +139,10 @@ extern void fmd_case_settime(fmd_case_t *, time_t, suseconds_t); extern int fmd_case_repair(fmd_case_t *); +extern int fmd_case_acquit(fmd_case_t *); extern int fmd_case_contains(fmd_case_t *, fmd_event_t *); extern int fmd_case_orphaned(fmd_case_t *); +extern void fmd_case_repair_replay(void); #ifdef __cplusplus }
--- a/usr/src/cmd/fm/fmd/common/fmd_dispq.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_dispq.c Sat Aug 02 03:26:27 2008 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -328,9 +327,12 @@ * events contained inside of it, determine the maximum length of all * class strings that will be used in this dispatch operation. */ - if (FMD_EVENT_TYPE(ep) == FMD_EVT_PROTOCOL && strcmp(class, - FM_LIST_SUSPECT_CLASS) == 0 && nvlist_lookup_nvlist_array( - FMD_EVENT_NVL(ep), FM_SUSPECT_FAULT_LIST, &nva, &nvc) == 0) { + if (FMD_EVENT_TYPE(ep) == FMD_EVT_PROTOCOL && + (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || + strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 || + strcmp(class, FM_LIST_UPDATED_CLASS) == 0) && + nvlist_lookup_nvlist_array(FMD_EVENT_NVL(ep), FM_SUSPECT_FAULT_LIST, + &nva, &nvc) == 0) { for (nvi = 0; nvi < nvc; nvi++) { if (nvlist_lookup_string(nva[nvi], FM_CLASS, &c) == 0) { size_t len = strlen(c) + 1;
--- a/usr/src/cmd/fm/fmd/common/fmd_fmri.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_fmri.c Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -324,6 +324,40 @@ } int +fmd_fmri_replaced(nvlist_t *nvl) +{ + fmd_scheme_t *sp; + int rv; + + if ((sp = nvl2scheme(nvl)) == NULL) + return (-1); /* errno is set for us */ + + (void) pthread_mutex_lock(&sp->sch_opslock); + rv = sp->sch_ops.sop_replaced(nvl); + (void) pthread_mutex_unlock(&sp->sch_opslock); + + fmd_scheme_hash_release(fmd.d_schemes, sp); + return (rv); +} + +int +fmd_fmri_service_state(nvlist_t *nvl) +{ + fmd_scheme_t *sp; + int rv; + + if ((sp = nvl2scheme(nvl)) == NULL) + return (-1); /* errno is set for us */ + + (void) pthread_mutex_lock(&sp->sch_opslock); + rv = sp->sch_ops.sop_service_state(nvl); + (void) pthread_mutex_unlock(&sp->sch_opslock); + + fmd_scheme_hash_release(fmd.d_schemes, sp); + return (rv); +} + +int fmd_fmri_unusable(nvlist_t *nvl) { fmd_scheme_t *sp;
--- a/usr/src/cmd/fm/fmd/common/fmd_fmri.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_fmri.h Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -91,10 +91,25 @@ extern ssize_t fmd_fmri_nvl2str(nvlist_t *, char *, size_t); extern int fmd_fmri_expand(nvlist_t *); extern int fmd_fmri_present(nvlist_t *); +extern int fmd_fmri_replaced(nvlist_t *); +extern int fmd_fmri_service_state(nvlist_t *); extern int fmd_fmri_unusable(nvlist_t *); extern int fmd_fmri_contains(nvlist_t *, nvlist_t *); extern nvlist_t *fmd_fmri_translate(nvlist_t *, nvlist_t *); +#define FMD_OBJ_STATE_UNKNOWN 1 +#define FMD_OBJ_STATE_STILL_PRESENT 2 +#define FMD_OBJ_STATE_REPLACED 3 +#define FMD_OBJ_STATE_NOT_PRESENT 4 + +#define FMD_SERVICE_STATE_UNKNOWN 0 +#define FMD_SERVICE_STATE_OK 1 +#define FMD_SERVICE_STATE_DEGRADED 2 +#define FMD_SERVICE_STATE_UNUSABLE 3 +#define FMD_SERVICE_STATE_DEGRADED_PENDING_RESET 4 +#define FMD_SERVICE_STATE_UNUSABLE_PENDING_RESET 5 +#define FMD_SERVICE_STATE_UNUSABLE_UNTIL_REPLACED 6 + #ifdef __cplusplus } #endif
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.c Sat Aug 02 03:26:27 2008 -0700 @@ -182,7 +182,8 @@ fmd_protocol_rsrc_asru(const char *class, nvlist_t *fmri, const char *uuid, const char *code, boolean_t faulty, boolean_t unusable, boolean_t message, nvlist_t *event, - struct timeval *tvp) + struct timeval *tvp, boolean_t repaired, boolean_t replaced, + boolean_t acquitted) { nvlist_t *nvl; int64_t tod[2]; @@ -206,6 +207,9 @@ err |= nvlist_add_string(nvl, FM_RSRC_ASRU_CODE, code); err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, faulty); + err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, repaired); + err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, replaced); + err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, acquitted); err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, unusable); err |= nvlist_add_boolean_value(nvl, FM_SUSPECT_MESSAGE, message); err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.h Sat Aug 02 03:26:27 2008 -0700 @@ -76,7 +76,7 @@ struct timeval *); extern nvlist_t *fmd_protocol_rsrc_asru(const char *, nvlist_t *, const char *, const char *, boolean_t, boolean_t, boolean_t, nvlist_t *, - struct timeval *); + struct timeval *m, boolean_t, boolean_t, boolean_t); extern nvlist_t *fmd_protocol_fmderror(int, const char *, va_list); extern nvlist_t *fmd_protocol_moderror(struct fmd_module *, int, const char *); extern nvlist_t *fmd_protocol_xprt_ctl(struct fmd_module *,
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c Sat Aug 02 03:26:27 2008 -0700 @@ -468,11 +468,32 @@ bool_t fmd_adm_rsrcflush_1_svc(char *name, int *rvp, struct svc_req *req) { - return (fmd_adm_rsrcrepair_1_svc(name, rvp, req)); + return (fmd_adm_rsrcrepaired_1_svc(name, rvp, req)); } bool_t -fmd_adm_rsrcrepair_1_svc(char *name, int *rvp, struct svc_req *req) +fmd_adm_rsrcrepaired_1_svc(char *name, int *rvp, struct svc_req *req) +{ + int err = FMD_ADM_ERR_RSRCNOTF; + + if (fmd_rpc_deny(req)) + err = FMD_ADM_ERR_PERM; + else { + fmd_asru_hash_apply_by_asru(fmd.d_asrus, name, + fmd_asru_repaired, &err); + fmd_asru_hash_apply_by_label(fmd.d_asrus, name, + fmd_asru_repaired, &err); + fmd_asru_hash_apply_by_fru(fmd.d_asrus, name, + fmd_asru_repaired, &err); + fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name, + fmd_asru_repaired, &err); + } + *rvp = err; + return (TRUE); +} + +bool_t +fmd_adm_rsrcreplaced_1_svc(char *name, int *rvp, struct svc_req *req) { int err = FMD_ADM_ERR_RSRCNOTF; @@ -480,13 +501,52 @@ err = FMD_ADM_ERR_PERM; else { fmd_asru_hash_apply_by_asru(fmd.d_asrus, name, - fmd_asru_repair, &err); + fmd_asru_replaced, &err); fmd_asru_hash_apply_by_label(fmd.d_asrus, name, - fmd_asru_repair, &err); + fmd_asru_replaced, &err); fmd_asru_hash_apply_by_fru(fmd.d_asrus, name, - fmd_asru_repair, &err); + fmd_asru_replaced, &err); fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name, - fmd_asru_repair, &err); + fmd_asru_replaced, &err); + } + *rvp = err; + return (TRUE); +} + +typedef struct { + int *errp; + char *uuid; +} fmd_adm_ra_t; + +void +fmd_asru_ra_cb(fmd_asru_link_t *alp, void *arg) +{ + fmd_adm_ra_t *farap = (fmd_adm_ra_t *)arg; + + if (strcmp(farap->uuid, "") == 0 || + strcmp(farap->uuid, alp->al_case_uuid) == 0) + fmd_asru_acquit(alp, farap->errp); +} + +bool_t +fmd_adm_rsrcacquit_1_svc(char *name, char *uuid, int *rvp, struct svc_req *req) +{ + int err = FMD_ADM_ERR_RSRCNOTF; + fmd_adm_ra_t fara; + + if (fmd_rpc_deny(req)) + err = FMD_ADM_ERR_PERM; + else { + fara.errp = &err; + fara.uuid = uuid; + fmd_asru_hash_apply_by_asru(fmd.d_asrus, name, + fmd_asru_ra_cb, &fara); + fmd_asru_hash_apply_by_label(fmd.d_asrus, name, + fmd_asru_ra_cb, &fara); + fmd_asru_hash_apply_by_fru(fmd.d_asrus, name, + fmd_asru_ra_cb, &fara); + fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name, + fmd_asru_ra_cb, &fara); } *rvp = err; return (TRUE); @@ -669,6 +729,28 @@ return (TRUE); } +bool_t +fmd_adm_caseacquit_1_svc(char *uuid, int *rvp, struct svc_req *req) +{ + fmd_case_t *cp = NULL; + int err = 0; + + if (fmd_rpc_deny(req)) + err = FMD_ADM_ERR_PERM; + else if ((cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) == NULL) + err = FMD_ADM_ERR_CASESRCH; + else if (fmd_case_acquit(cp) != 0) { + err = errno == EFMD_CASE_OWNER ? + FMD_ADM_ERR_CASEXPRT : FMD_ADM_ERR_CASEOPEN; + } + + if (cp != NULL) + fmd_case_rele(cp); + + *rvp = err; + return (TRUE); +} + void fmd_adm_caselist_case(fmd_case_t *cp, void *arg) {
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -135,7 +135,7 @@ struct fmd_rpc_rsrclist FMD_ADM_RSRCLIST(bool) = 9; struct fmd_rpc_rsrcinfo FMD_ADM_RSRCINFO(string) = 10; int FMD_ADM_RSRCFLUSH(string) = 11; - int FMD_ADM_RSRCREPAIR(string) = 12; + int FMD_ADM_RSRCREPAIRED(string) = 12; struct fmd_rpc_serdlist FMD_ADM_SERDINFO(string) = 13; int FMD_ADM_SERDRESET(string, string) = 14; int FMD_ADM_LOGROTATE(string) = 15; @@ -144,6 +144,9 @@ struct fmd_rpc_modstat FMD_ADM_XPRTSTAT(int32_t) = 18; struct fmd_rpc_caselist FMD_ADM_CASELIST(void) = 19; struct fmd_rpc_caseinfo FMD_ADM_CASEINFO(string) = 20; + int FMD_ADM_RSRCREPLACED(string) = 21; + int FMD_ADM_RSRCACQUIT(string, string) = 22; + int FMD_ADM_CASEACQUIT(string) = 23; } = 1; } = 100169;
--- a/usr/src/cmd/fm/fmd/common/fmd_scheme.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_scheme.c Sat Aug 02 03:26:27 2008 -0700 @@ -78,6 +78,44 @@ } static int +fmd_scheme_fmd_replaced(nvlist_t *nvl) +{ + char *name, *version; + fmd_module_t *mp; + int rv = 0; + + if (nvlist_lookup_string(nvl, FM_FMRI_FMD_NAME, &name) != 0 || + nvlist_lookup_string(nvl, FM_FMRI_FMD_VERSION, &version) != 0) + return (fmd_fmri_set_errno(EINVAL)); + + if ((mp = fmd_modhash_lookup(fmd.d_mod_hash, name)) != NULL) { + rv = mp->mod_vers != NULL && + strcmp(mp->mod_vers, version) == 0; + fmd_module_rele(mp); + } + + return (rv ? FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_NOT_PRESENT); +} + +static int +fmd_scheme_fmd_service_state(nvlist_t *nvl) +{ + char *name; + fmd_module_t *mp; + int rv = 1; + + if (nvlist_lookup_string(nvl, FM_FMRI_FMD_NAME, &name) != 0) + return (fmd_fmri_set_errno(EINVAL)); + + if ((mp = fmd_modhash_lookup(fmd.d_mod_hash, name)) != NULL) { + rv = mp->mod_error != 0; + fmd_module_rele(mp); + } + + return (rv ? FMD_SERVICE_STATE_UNUSABLE : FMD_SERVICE_STATE_OK); +} + +static int fmd_scheme_fmd_unusable(nvlist_t *nvl) { char *name; @@ -125,6 +163,8 @@ (ssize_t (*)())fmd_scheme_notsup, /* sop_nvl2str */ (int (*)())fmd_scheme_nop, /* sop_expand */ (int (*)())fmd_scheme_notsup, /* sop_present */ + (int (*)())fmd_scheme_notsup, /* sop_replaced */ + (int (*)())fmd_scheme_notsup, /* sop_service_state */ (int (*)())fmd_scheme_notsup, /* sop_unusable */ (int (*)())fmd_scheme_notsup, /* sop_contains */ fmd_scheme_notranslate /* sop_translate */ @@ -136,6 +176,8 @@ fmd_scheme_fmd_nvl2str, /* sop_nvl2str */ (int (*)())fmd_scheme_nop, /* sop_expand */ fmd_scheme_fmd_present, /* sop_present */ + fmd_scheme_fmd_replaced, /* sop_replaced */ + fmd_scheme_fmd_service_state, /* sop_service_state */ fmd_scheme_fmd_unusable, /* sop_unusable */ (int (*)())fmd_scheme_notsup, /* sop_contains */ fmd_scheme_notranslate /* sop_translate */ @@ -151,6 +193,9 @@ { "fmd_fmri_nvl2str", offsetof(fmd_scheme_ops_t, sop_nvl2str) }, { "fmd_fmri_expand", offsetof(fmd_scheme_ops_t, sop_expand) }, { "fmd_fmri_present", offsetof(fmd_scheme_ops_t, sop_present) }, + { "fmd_fmri_replaced", offsetof(fmd_scheme_ops_t, sop_replaced) }, + { "fmd_fmri_service_state", offsetof(fmd_scheme_ops_t, + sop_service_state) }, { "fmd_fmri_unusable", offsetof(fmd_scheme_ops_t, sop_unusable) }, { "fmd_fmri_contains", offsetof(fmd_scheme_ops_t, sop_contains) }, { "fmd_fmri_translate", offsetof(fmd_scheme_ops_t, sop_translate) },
--- a/usr/src/cmd/fm/fmd/common/fmd_scheme.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_scheme.h Sat Aug 02 03:26:27 2008 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,6 +47,8 @@ ssize_t (*sop_nvl2str)(nvlist_t *, char *, size_t); int (*sop_expand)(nvlist_t *); int (*sop_present)(nvlist_t *); + int (*sop_replaced)(nvlist_t *); + int (*sop_service_state)(nvlist_t *); int (*sop_unusable)(nvlist_t *); int (*sop_contains)(nvlist_t *, nvlist_t *); nvlist_t *(*sop_translate)(nvlist_t *, nvlist_t *);
--- a/usr/src/cmd/fm/fmd/common/fmd_self.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmd/common/fmd_self.c Sat Aug 02 03:26:27 2008 -0700 @@ -141,8 +141,10 @@ */ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 || + strcmp(class, FM_LIST_UPDATED_CLASS) == 0 || + strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 || strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) - return; /* if no agents are present just drop list.suspect */ + return; /* if no agents are present just drop list.* */ if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0) return; /* if fmd itself produced the error just drop it */
--- a/usr/src/cmd/fm/fmdump/common/fault.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/fmdump/common/fault.c Sat Aug 02 03:26:27 2008 -0700 @@ -44,6 +44,15 @@ (void) snprintf(str, sizeof (str), "%s %s", code, "Repaired"); code = str; } + if (class != NULL && strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) { + (void) snprintf(str, sizeof (str), "%s %s", code, "Resolved"); + code = str; + } + + if (class != NULL && strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { + (void) snprintf(str, sizeof (str), "%s %s", code, "Updated"); + code = str; + } fmdump_printf(fp, "%-20s %-32s %s\n", fmdump_date(buf, sizeof (buf), rp), uuid, code); @@ -56,6 +65,7 @@ { uint_t i, size = 0; nvlist_t **nva; + uint8_t *ba; (void) flt_short(lp, rp, fp); (void) nvlist_lookup_uint32(rp->rec_nvl, FM_SUSPECT_FAULT_SZ, &size); @@ -63,6 +73,8 @@ if (size != 0) { (void) nvlist_lookup_nvlist_array(rp->rec_nvl, FM_SUSPECT_FAULT_LIST, &nva, &size); + (void) nvlist_lookup_uint8_array(rp->rec_nvl, + FM_SUSPECT_FAULT_STATUS, &ba, &size); } for (i = 0; i < size; i++) { @@ -91,15 +103,24 @@ } - fmdump_printf(fp, " %3u%% %s\n\n", + fmdump_printf(fp, " %3u%% %s", pct, class ? class : "-"); - /* - * Originally we didn't require FM_FAULT_RESOURCE, so if it - * isn't defined in the event, display the ASRU FMRI instead. - */ + if (ba[i] & FM_SUSPECT_FAULTY) + fmdump_printf(fp, "\n\n"); + else if (ba[i] & FM_SUSPECT_NOT_PRESENT) + fmdump_printf(fp, "\tRemoved\n\n"); + else if (ba[i] & FM_SUSPECT_REPLACED) + fmdump_printf(fp, "\tReplaced\n\n"); + else if (ba[i] & FM_SUSPECT_REPAIRED) + fmdump_printf(fp, "\tRepair Attempted\n\n"); + else if (ba[i] & FM_SUSPECT_ACQUITTED) + fmdump_printf(fp, "\tAcquitted\n\n"); + else + fmdump_printf(fp, "\n\n"); + fmdump_printf(fp, " Problem in: %s\n", - rname ? rname : aname ? aname : "-"); + rname ? rname : "-"); fmdump_printf(fp, " Affects: %s\n", aname ? aname : "-");
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c Sat Aug 02 03:26:27 2008 -0700 @@ -289,7 +289,7 @@ } static void -cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair) +cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class) { char *uuid = NULL; nvlist_t **nva; @@ -308,9 +308,11 @@ } keepopen = nvc; - while (nvc-- != 0 && (repair || !fmd_case_uuclosed(hdl, uuid))) { + while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 || + !fmd_case_uuclosed(hdl, uuid))) { nvlist_t *nvl = *nva++; const cma_subscriber_t *subr; + int has_fault; if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL) continue; @@ -322,8 +324,17 @@ * A handler must not close the case itself. */ if (subr->subr_func != NULL) { - err = subr->subr_func(hdl, nvl, asru, uuid, repair); - + has_fault = fmd_nvl_fmri_has_fault(hdl, asru, + FMD_HAS_FAULT_ASRU, NULL); + if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { + if (has_fault == 1) + err = subr->subr_func(hdl, nvl, asru, + uuid, 0); + } else { + if (has_fault == 0) + err = subr->subr_func(hdl, nvl, asru, + uuid, 1); + } if (err == CMA_RA_SUCCESS) keepopen--; } @@ -332,10 +343,12 @@ * Do not close the case if we are handling cache faults. */ if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX, &index) != 0) { - if (!keepopen && !repair) { + if (!keepopen && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { fmd_case_uuclose(hdl, uuid); } } + if (!keepopen && strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) + fmd_case_uuresolved(hdl, uuid); } static void @@ -347,21 +360,23 @@ if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL) return; - if (subr->subr_func != NULL) - (void) subr->subr_func(hdl, nvl, asru, NULL, 0); + if (subr->subr_func != NULL) { + if (fmd_nvl_fmri_has_fault(hdl, asru, + FMD_HAS_FAULT_ASRU, NULL) == 1) + (void) subr->subr_func(hdl, nvl, asru, NULL, 0); + } } /*ARGSUSED*/ static void cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) { - boolean_t repair = B_FALSE; - fmd_hdl_debug(hdl, "received %s\n", class); if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || - (repair = (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0))) - cma_recv_list(hdl, nvl, repair); + strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 || + strcmp(class, FM_LIST_UPDATED_CLASS) == 0) + cma_recv_list(hdl, nvl, class); else cma_recv_one(hdl, nvl); } @@ -448,7 +463,6 @@ if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) return; /* invalid data in configuration file */ - fmd_hdl_subscribe(hdl, "list.repaired"); fmd_hdl_subscribe(hdl, "fault.cpu.*"); fmd_hdl_subscribe(hdl, "fault.memory.*"); #ifdef opl
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -27,5 +27,4 @@ # fmd configuration file for the disk-monitor.so disk monitor. # subscribe fault.io.disk.* -subscribe list.repaired dictionary DISK
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -174,7 +174,7 @@ } static void -diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl) +diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl, int repair) { char *uuid = NULL; nvlist_t **nva; @@ -209,6 +209,9 @@ dm_state_change(diskp, HPS_REPAIRED); } + if (repair) + fmd_case_uuresolved(hdl, uuid); + } static void @@ -267,12 +270,17 @@ * Act on the fault suspect list or repaired list (embedded agent * action). */ - if (fmd_nvl_class_match(hdl, nvl, "list.repaired")) { + if (fmd_nvl_class_match(hdl, nvl, FM_LIST_REPAIRED_CLASS)) { - diskmon_agent_repair(hdl, nvl); + diskmon_agent_repair(hdl, nvl, 1); return; - } else if (fmd_nvl_class_match(hdl, nvl, "list.suspect")) { + } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_UPDATED_CLASS)) { + + diskmon_agent_repair(hdl, nvl, 0); + return; + + } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_SUSPECT_CLASS)) { diskmon_agent_suspect(hdl, nvl); return;
--- a/usr/src/cmd/fm/modules/common/eversholt/eval.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/eversholt/eval.c Sat Aug 02 03:26:27 2008 -0700 @@ -56,6 +56,8 @@ static struct node *eval_fru(struct node *np); static struct node *eval_asru(struct node *np); +extern fmd_hdl_t *Hdl; /* handle from eft.c */ + /* * begins_with -- return true if rhs path begins with everything in lhs path */ @@ -409,6 +411,27 @@ if (cp != NULL) valuep->v = 1; return (1); + } else if (funcname == L_has_fault) { + nvlist_t *asru = NULL, *fru = NULL, *rsrc = NULL; + + nodep = eval_getname(funcnp, ex, events, np->u.expr.left, + globals, croot, arrowp, try, &duped); + path = ipath2str(NULL, ipath(nodep)); + platform_units_translate(0, croot, &asru, &fru, &rsrc, path); + FREE((void *)path); + if (duped) + tree_free(nodep); + + if (rsrc == NULL) + valuep->v = 0; + else + valuep->v = fmd_nvl_fmri_has_fault(Hdl, rsrc, + FMD_HAS_FAULT_RESOURCE, + strcmp(np->u.expr.right->u.quote.s, "") == 0 ? + NULL : (char *)np->u.expr.right->u.quote.s); + valuep->t = UINT64; + valuep->v = 0; + return (1); } else if (funcname == L_count) { struct stats *statp; struct istat_entry ent;
--- a/usr/src/cmd/fm/modules/common/eversholt/fme.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/eversholt/fme.c Sat Aug 02 03:26:27 2008 -0700 @@ -2885,7 +2885,7 @@ /* * If "action" property exists, evaluate it; this must be done * before the allfaulty check below since some actions may - * modify the asru to be used in fmd_nvl_fmri_faulty. This + * modify the asru to be used in fmd_nvl_fmri_has_fault. This * needs to be restructured if any new actions are introduced * that have effects that we do not want to be visible if * we decide not to publish in the dupclose check below. @@ -2918,7 +2918,8 @@ FM_FAULT_ASRU, &asru) != 0) { out(O_ALTFP|O_VERB, "NULL asru"); allfaulty = B_FALSE; - } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { + } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru, + FMD_HAS_FAULT_ASRU, NULL)) { out(O_ALTFP|O_VERB, "faulty"); } else { out(O_ALTFP|O_VERB, "not faulty");
--- a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #pragma ident "%Z%%M% %I% %E% SMI" @@ -28,6 +28,5 @@ # setprop global-disable false subscribe fault.io.* -subscribe list.repaired subscribe defect.io.* subscribe defect.ultraSPARC-II.memory.nodiag
--- a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c Sat Aug 02 03:26:27 2008 -0700 @@ -122,17 +122,16 @@ static void rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) { - nvlist_t **faults; + nvlist_t **faults = NULL; nvlist_t *asru; - uint_t nfaults; + uint_t nfaults = 0; int f; - char devpath[PATH_MAX]; char *path; char *uuid; char *scheme; di_retire_t drt = {0}; int retire; - int rval; + int rval = 0; int error; char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO"."; boolean_t rtr; @@ -154,24 +153,23 @@ retire = 1; } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) { retire = 0; + } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { + retire = 0; } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) { - fmd_hdl_debug(hdl, "rio_recv: single fault: %s\n", class); - return; + retire = 1; + faults = &nvl; + nfaults = 1; } else { fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class); return; } - faults = NULL; - nfaults = 0; - if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, - &faults, &nfaults) != 0) { + if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl, + FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) { fmd_hdl_debug(hdl, "rio_recv: no fault list"); return; } - devpath[0] = '\0'; - rval = 0; for (f = 0; f < nfaults; f++) { if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE, &rtr) == 0 && !rtr) { @@ -193,7 +191,7 @@ continue; } - if (retire && fault_exception(hdl, faults[f])) + if (fault_exception(hdl, faults[f])) continue; if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, @@ -202,52 +200,50 @@ continue; } - /* - * If retire, we retire only if a single ASRU is pinpointed. - * We don't do automatic retires if a fault event pinpoints - * more than one ASRU. - */ if (retire) { - if (devpath[0] != '\0' && strcmp(path, devpath) != 0) { - fmd_hdl_debug(hdl, - "rio_recv: Skipping: multiple ASRU"); - return; - } else if (devpath[0] == '\0') { - (void) strlcpy(devpath, path, sizeof (devpath)); + if (fmd_nvl_fmri_has_fault(hdl, asru, + FMD_HAS_FAULT_ASRU, NULL) == 1) { + error = di_retire_device(path, &drt, 0); + if (error != 0) { + fmd_hdl_debug(hdl, "rio_recv:" + " di_retire_device failed:" + " error: %d %s", error, path); + rval = -1; + } } } else { - error = di_unretire_device(path, &drt); - if (error != 0) { - fmd_hdl_debug(hdl, "rio_recv: " - "di_unretire_device failed: error: %d %s", - error, path); - rval = -1; + if (fmd_nvl_fmri_has_fault(hdl, asru, + FMD_HAS_FAULT_ASRU, NULL) == 0) { + error = di_unretire_device(path, &drt); + if (error != 0) { + fmd_hdl_debug(hdl, "rio_recv:" + " di_unretire_device failed:" + " error: %d %s", error, path); + rval = -1; + } } } } - if (retire) { - if (devpath[0] == '\0') - return; - error = di_retire_device(devpath, &drt, 0); - if (error != 0) { - fmd_hdl_debug(hdl, "rio_recv: di_retire_device " - "failed: error: %d %s", error, devpath); - rval = -1; - } - } - /* * The fmd framework takes care of moving a case to the repaired * state. To move the case to the closed state however, we (the * retire agent) need to call fmd_case_uuclose() */ - if (retire && rval == 0) { + if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) { if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 && !fmd_case_uuclosed(hdl, uuid)) { fmd_case_uuclose(hdl, uuid); } } + + /* + * Similarly to move the case to the resolved state, we (the + * retire agent) need to call fmd_case_uuresolved() + */ + if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 && + nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) + fmd_case_uuresolved(hdl, uuid); } static const fmd_hdl_ops_t fmd_ops = {
--- a/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c Sat Aug 02 03:26:27 2008 -0700 @@ -399,6 +399,7 @@ fmd_prop_free_string(hdl, rootdir); fmd_hdl_subscribe(hdl, FM_LIST_SUSPECT_CLASS); fmd_hdl_subscribe(hdl, FM_LIST_REPAIRED_CLASS); + fmd_hdl_subscribe(hdl, FM_LIST_RESOLVED_CLASS); } void
--- a/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c Sat Aug 02 03:26:27 2008 -0700 @@ -390,7 +390,8 @@ syslog_pointer = dgettext(SYSLOG_DOMAIN, SYSLOG_POINTER); syslog_ctl.pri &= LOG_FACMASK; - if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) + if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 || + strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) syslog_ctl.pri |= LOG_NOTICE; else syslog_ctl.pri |= LOG_ERR; @@ -549,6 +550,7 @@ fmd_prop_free_string(hdl, rootdir); fmd_hdl_subscribe(hdl, FM_LIST_SUSPECT_CLASS); fmd_hdl_subscribe(hdl, FM_LIST_REPAIRED_CLASS); + fmd_hdl_subscribe(hdl, FM_LIST_RESOLVED_CLASS); } void
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -28,4 +28,3 @@ # subscribe fault.fs.zfs.* subscribe resource.fs.zfs.removed -subscribe list.repaired
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c Sat Aug 02 03:26:27 2008 -0700 @@ -209,6 +209,8 @@ boolean_t is_repair; char *scheme; nvlist_t *vdev; + char *uuid; + int repair_done = 0; /* * If this is a resource notifying us of device removal, then simply @@ -231,7 +233,7 @@ return; } - if (strcmp(class, "list.repaired") == 0) + if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) is_repair = B_TRUE; else is_repair = B_FALSE; @@ -288,6 +290,7 @@ * continue. */ if (is_repair) { + repair_done = 1; (void) zpool_vdev_clear(zhp, vdev_guid); zpool_close(zhp); continue; @@ -307,6 +310,10 @@ replace_with_spare(zhp, vdev); zpool_close(zhp); } + + if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && + nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) + fmd_case_uuresolved(hdl, uuid); } static const fmd_hdl_ops_t fmd_ops = {
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c Sat Aug 02 03:26:27 2008 -0700 @@ -280,7 +280,7 @@ return (-1); } - err = fmd_adm_rsrc_repair(ap, buf); + err = fmd_adm_rsrc_repaired(ap, buf); if (err) err = -1; fmd_adm_close(ap);
--- a/usr/src/cmd/fm/schemes/cpu/cpu.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/schemes/cpu/cpu.c Sat Aug 02 03:26:27 2008 -0700 @@ -254,6 +254,65 @@ } int +fmd_fmri_replaced(nvlist_t *nvl) +{ + int rc, err = 0; + uint8_t version; + uint32_t cpuid; + uint64_t nvlserid, curserid; + char *nvlserstr, curserbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ + topo_hdl_t *thp; + + if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || + nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0) + return (fmd_fmri_set_errno(EINVAL)); + + /* + * If the cpu-scheme topology exports this method replaced(), invoke it. + */ + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) + return (fmd_fmri_set_errno(EINVAL)); + rc = topo_fmri_replaced(thp, nvl, &err); + fmd_fmri_topo_rele(thp); + if (err != ETOPO_METHOD_NOTSUP) + return (rc); + + if (version == CPU_SCHEME_VERSION0) { + if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, + &nvlserid) != 0) + return (fmd_fmri_set_errno(EINVAL)); + if (cpu_get_serialid_V0(cpuid, &curserid) != 0) + return (errno == ENOENT ? + FMD_OBJ_STATE_NOT_PRESENT : -1); + + return (curserid == nvlserid ? FMD_OBJ_STATE_STILL_PRESENT : + FMD_OBJ_STATE_REPLACED); + + } else if (version == CPU_SCHEME_VERSION1) { + if ((rc = nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, + &nvlserstr)) != 0) + if (rc != ENOENT) + return (fmd_fmri_set_errno(EINVAL)); + + /* + * If serial id is not available, just check if the cpuid + * is present. + */ + if (cpu_get_serialid_V1(cpuid, curserbuf, 21) != 0) + if (cpu_cpuid_present(cpuid)) + return (FMD_OBJ_STATE_UNKNOWN); + else + return (FMD_OBJ_STATE_NOT_PRESENT); + + return (strcmp(curserbuf, nvlserstr) == 0 ? + FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED); + + } else { + return (fmd_fmri_set_errno(EINVAL)); + } +} + +int fmd_fmri_unusable(nvlist_t *nvl) { int rc, err = 0;
--- a/usr/src/cmd/fm/schemes/dev/scheme.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/schemes/dev/scheme.c Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,10 +83,22 @@ present = topo_fmri_present(thp, nvl, &err); fmd_fmri_topo_rele(thp); - if (err != 0) - return (0); - else - return (present); + return (present); +} + +int +fmd_fmri_replaced(nvlist_t *nvl) +{ + int err, rval; + topo_hdl_t *thp; + + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) + return (fmd_fmri_set_errno(EINVAL)); + err = 0; + rval = topo_fmri_replaced(thp, nvl, &err); + fmd_fmri_topo_rele(thp); + + return (rval); } int @@ -111,3 +123,26 @@ else return (unusable); } + +int +fmd_fmri_service_state(nvlist_t *nvl) +{ + uint8_t version; + int err, service_state; + topo_hdl_t *thp; + + if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || + version > FM_DEV_SCHEME_VERSION) + return (fmd_fmri_set_errno(EINVAL)); + + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) + return (fmd_fmri_set_errno(EINVAL)); + err = 0; + service_state = topo_fmri_service_state(thp, nvl, &err); + fmd_fmri_topo_rele(thp); + + if (err != 0) + return (FMD_SERVICE_STATE_UNKNOWN); + else + return (service_state); +}
--- a/usr/src/cmd/fm/schemes/hc/scheme.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/schemes/hc/scheme.c Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -84,17 +84,36 @@ err = nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcprs, &hcnprs); err |= nvlist_lookup_string(hcprs[0], FM_FMRI_HC_NAME, &nm); if (err != 0) - return (0); + return (fmd_fmri_set_errno(EINVAL)); if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) return (fmd_fmri_set_errno(EINVAL)); present = topo_fmri_present(thp, nvl, &err); fmd_fmri_topo_rele(thp); + return (present); +} + +int +fmd_fmri_replaced(nvlist_t *nvl) +{ + int err, replaced; + topo_hdl_t *thp; + nvlist_t **hcprs; + char *nm; + uint_t hcnprs; + + err = nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcprs, &hcnprs); + err |= nvlist_lookup_string(hcprs[0], FM_FMRI_HC_NAME, &nm); if (err != 0) - return (present); - else - return (1); + return (fmd_fmri_set_errno(EINVAL)); + + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) + return (fmd_fmri_set_errno(EINVAL)); + replaced = topo_fmri_replaced(thp, nvl, &err); + fmd_fmri_topo_rele(thp); + + return (replaced); } /*
--- a/usr/src/cmd/fm/schemes/mem/mem.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/cmd/fm/schemes/mem/mem.c Sat Aug 02 03:26:27 2008 -0700 @@ -204,6 +204,8 @@ size_t nserids; #else nvlist_t *unum_nvl; + nvlist_t *nvlcp = NULL; + uint64_t val; #endif /* sparc */ if (mem_fmri_get_unum(nvl, &unum) < 0) @@ -274,6 +276,121 @@ rc = fmd_fmri_set_errno(EINVAL); fmd_fmri_topo_rele(thp); + /* + * Need to check if this is a valid page too. if "isretired" returns + * EINVAL, assume page invalid and return not_present. + */ + if (rc == 1 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == + 0 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 && + mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) { + int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp); + if (rval == -1 && errno == EINVAL) + rc = 0; + nvlist_free(nvlcp); + } +#endif /* sparc */ + return (rc); +} + +int +fmd_fmri_replaced(nvlist_t *nvl) +{ + char *unum = NULL; + int rc, err = 0; + struct topo_hdl *thp; +#ifdef sparc + char **nvlserids, **serids; + uint_t nnvlserids; + size_t nserids; +#else + nvlist_t *unum_nvl; + nvlist_t *nvlcp = NULL; + uint64_t val; +#endif /* sparc */ + + if (mem_fmri_get_unum(nvl, &unum) < 0) + return (-1); /* errno is set for us */ + +#ifdef sparc + /* + * If the mem-scheme topology exports this method replaced(), invoke it. + */ + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) + return (fmd_fmri_set_errno(EINVAL)); + rc = topo_fmri_replaced(thp, nvl, &err); + fmd_fmri_topo_rele(thp); + if (err != ETOPO_METHOD_NOTSUP) + return (rc); + + if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids, + &nnvlserids) != 0) { + /* + * Some mem scheme FMRIs don't have serial ids because + * either the platform does not support them, or because + * the FMRI was created before support for serial ids was + * introduced. If this is the case, assume it is there. + */ + if (mem.mem_dm == NULL) + return (FMD_OBJ_STATE_UNKNOWN); + else + return (fmd_fmri_set_errno(EINVAL)); + } + + if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { + if (errno == ENOTSUP) + return (FMD_OBJ_STATE_UNKNOWN); + if (errno != ENOENT) { + /* + * Errors are only signalled to the caller if they're + * the caller's fault. This isn't - it's a failure on + * our part to burst or read the serial numbers. We'll + * whine about it, and tell the caller the named + * module(s) isn't/aren't there. + */ + fmd_fmri_warn("failed to retrieve serial number for " + "unum %s", unum); + } + return (FMD_OBJ_STATE_NOT_PRESENT); + } + + rc = serids_eq(serids, nserids, nvlserids, nnvlserids) ? + FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED; + + mem_strarray_free(serids, nserids); +#else + /* + * On X86 we will invoke the topo is_replaced method passing in the + * unum, which is in hc scheme. The libtopo hc-scheme is_replaced + * method will invoke the node-specific is_replaced method, which is + * implemented by the chip enumerator for rank nodes. The rank node's + * is_replaced method will compare the serial number in the unum with + * the current serial to determine if the same DIMM is replaced. + */ + if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) { + fmd_fmri_warn("failed to get handle to topology"); + return (-1); + } + if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) { + rc = topo_fmri_replaced(thp, unum_nvl, &err); + nvlist_free(unum_nvl); + } else + rc = fmd_fmri_set_errno(EINVAL); + fmd_fmri_topo_rele(thp); + + /* + * Need to check if this is a valid page too. if "isretired" returns + * EINVAL, assume page invalid and return not_present. + */ + if ((rc == FMD_OBJ_STATE_STILL_PRESENT || + rc == FMD_OBJ_STATE_UNKNOWN) && + nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0 && + nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 && + mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) { + int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp); + if (rval == -1 && errno == EINVAL) + rc = FMD_OBJ_STATE_NOT_PRESENT; + nvlist_free(nvlcp); + } #endif /* sparc */ return (rc); }
--- a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c Sat Aug 02 03:26:27 2008 -0700 @@ -616,7 +616,7 @@ } int -fmd_adm_rsrc_repair(fmd_adm_t *ap, const char *fmri) +fmd_adm_rsrc_repaired(fmd_adm_t *ap, const char *fmri) { char *str = (char *)fmri; int err; @@ -627,7 +627,50 @@ return (fmd_adm_set_errno(ap, EINVAL)); do { - cs = fmd_adm_rsrcrepair_1(str, &err, ap->adm_clnt); + cs = fmd_adm_rsrcrepaired_1(str, &err, ap->adm_clnt); + } while (fmd_adm_retry(ap, cs, &retries)); + + if (cs != RPC_SUCCESS) + return (fmd_adm_set_errno(ap, EPROTO)); + + return (fmd_adm_set_svcerr(ap, err)); +} + +int +fmd_adm_rsrc_replaced(fmd_adm_t *ap, const char *fmri) +{ + char *str = (char *)fmri; + int err; + enum clnt_stat cs; + uint_t retries = 0; + + if (fmri == NULL) + return (fmd_adm_set_errno(ap, EINVAL)); + + do { + cs = fmd_adm_rsrcreplaced_1(str, &err, ap->adm_clnt); + } while (fmd_adm_retry(ap, cs, &retries)); + + if (cs != RPC_SUCCESS) + return (fmd_adm_set_errno(ap, EPROTO)); + + return (fmd_adm_set_svcerr(ap, err)); +} + +int +fmd_adm_rsrc_acquit(fmd_adm_t *ap, const char *fmri, const char *uuid) +{ + char *str = (char *)fmri; + char *str2 = (char *)uuid; + int err; + enum clnt_stat cs; + uint_t retries = 0; + + if (fmri == NULL) + return (fmd_adm_set_errno(ap, EINVAL)); + + do { + cs = fmd_adm_rsrcacquit_1(str, str2, &err, ap->adm_clnt); } while (fmd_adm_retry(ap, cs, &retries)); if (cs != RPC_SUCCESS) @@ -657,6 +700,27 @@ return (fmd_adm_set_svcerr(ap, err)); } +int +fmd_adm_case_acquit(fmd_adm_t *ap, const char *uuid) +{ + char *str = (char *)uuid; + int err; + enum clnt_stat cs; + uint_t retries = 0; + + if (uuid == NULL) + return (fmd_adm_set_errno(ap, EINVAL)); + + do { + cs = fmd_adm_caseacquit_1(str, &err, ap->adm_clnt); + } while (fmd_adm_retry(ap, cs, &retries)); + + if (cs != RPC_SUCCESS) + return (fmd_adm_set_errno(ap, EPROTO)); + + return (fmd_adm_set_svcerr(ap, err)); +} + static int fmd_adm_case_cmp(const void *lp, const void *rp) {
--- a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -104,8 +104,11 @@ extern int fmd_adm_rsrc_count(fmd_adm_t *, int, uint32_t *); extern int fmd_adm_rsrc_iter(fmd_adm_t *, int, fmd_adm_rsrc_f *, void *); extern int fmd_adm_rsrc_flush(fmd_adm_t *, const char *); -extern int fmd_adm_rsrc_repair(fmd_adm_t *, const char *); +extern int fmd_adm_rsrc_repaired(fmd_adm_t *, const char *); +extern int fmd_adm_rsrc_replaced(fmd_adm_t *, const char *); +extern int fmd_adm_rsrc_acquit(fmd_adm_t *, const char *, const char *); extern int fmd_adm_case_repair(fmd_adm_t *, const char *); +extern int fmd_adm_case_acquit(fmd_adm_t *, const char *); extern int fmd_adm_case_iter(fmd_adm_t *, const char *, fmd_adm_case_f *, void *);
--- a/usr/src/lib/fm/libfmd_adm/common/mapfile-vers Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_adm/common/mapfile-vers Sat Aug 02 03:26:27 2008 -0700 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -29,6 +29,7 @@ global: fmd_adm_case_iter; fmd_adm_case_repair; + fmd_adm_case_acquit; fmd_adm_close; fmd_adm_errmsg; fmd_adm_log_rotate; @@ -42,7 +43,9 @@ fmd_adm_rsrc_count; fmd_adm_rsrc_flush; fmd_adm_rsrc_iter; - fmd_adm_rsrc_repair; + fmd_adm_rsrc_repaired; + fmd_adm_rsrc_replaced; + fmd_adm_rsrc_acquit; fmd_adm_serd_iter; fmd_adm_serd_reset; fmd_adm_stats_free;
--- a/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,9 +63,18 @@ #define SUNFMFAULTEVENT_COL_ASRU 6 #define SUNFMFAULTEVENT_COL_FRU 7 #define SUNFMFAULTEVENT_COL_RESOURCE 8 +#define SUNFMFAULTEVENT_COL_STATUS 9 +#define SUNFMFAULTEVENT_COL_LOCATION 10 #define SUNFMFAULTEVENT_COLMIN SUNFMFAULTEVENT_COL_PROBLEMUUID -#define SUNFMFAULTEVENT_COLMAX SUNFMFAULTEVENT_COL_RESOURCE +#define SUNFMFAULTEVENT_COLMAX SUNFMFAULTEVENT_COL_LOCATION + +#define SUNFMFAULTEVENT_STATE_OTHER 1 +#define SUNFMFAULTEVENT_STATE_FAULTY 2 +#define SUNFMFAULTEVENT_STATE_REMOVED 3 +#define SUNFMFAULTEVENT_STATE_REPLACED 4 +#define SUNFMFAULTEVENT_STATE_REPAIRED 5 +#define SUNFMFAULTEVENT_STATE_ACQUITTED 6 #define SUNFMMODULETABLE_OID SUNFM_OID, 3
--- a/usr/src/lib/fm/libfmd_snmp/common/problem.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_snmp/common/problem.c Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -127,6 +127,18 @@ return (data->d_suspects[index - 1]); } +static sunFmFaultStatus_data_t +faultstatus_lookup_index_exact(sunFmProblem_data_t *data, ulong_t index) +{ + if (index > data->d_nsuspects) + return (NULL); + + if (data->d_statuses == NULL) + return (NULL); + + return (data->d_statuses[index - 1]); +} + /*ARGSUSED*/ static int problem_update_one(const fmd_adm_caseinfo_t *acp, void *arg) @@ -188,6 +200,11 @@ ASSERT(nelem == data->d_nsuspects); + (void) nvlist_lookup_uint8_array(data->d_aci_event, + FM_SUSPECT_FAULT_STATUS, &data->d_statuses, &nelem); + + ASSERT(nelem == data->d_nsuspects); + uu_avl_node_init(data, &data->d_uuid_avl, problem_uuid_avl_pool); (void) uu_avl_find(problem_uuid_avl, data, NULL, &idx); @@ -636,6 +653,96 @@ } } +/* + * Returns the ASN.1 lexicographically first fault event after the one + * identified by table_info. Indexes are updated to reflect the OID + * of the data returned. This allows us to implement GETNEXT. + */ +static sunFmFaultStatus_data_t +sunFmFaultStatusTable_nextfe(netsnmp_handler_registration *reginfo, + netsnmp_table_request_info *table_info) +{ + sunFmProblem_data_t *data; + sunFmFaultStatus_data_t rv; + netsnmp_variable_list *var; + ulong_t index; + + for (;;) { + switch (table_info->number_indexes) { + case 2: + default: + DEBUGMSGTL((MODNAME_STR, "nextfe: 2 indices:\n")); + DEBUGMSGVAR((MODNAME_STR, table_info->indexes)); + DEBUGMSG((MODNAME_STR, "\n")); + DEBUGMSGVAR((MODNAME_STR, + table_info->indexes->next_variable)); + DEBUGMSG((MODNAME_STR, "\n")); + index = *(ulong_t *) + table_info->indexes->next_variable->val.integer + 1; + + if ((data = sunFmProblemTable_pr(reginfo, + table_info)) != NULL && + (rv = faultstatus_lookup_index_exact(data, + index)) != NULL) { + snmp_set_var_typed_value( + table_info->indexes->next_variable, + ASN_UNSIGNED, (uchar_t *)&index, + sizeof (index)); + return (rv); + } + + if (sunFmProblemTable_nextpr(reginfo, table_info) == + NULL) + return (NULL); + break; + case 1: + if ((data = sunFmProblemTable_pr(reginfo, + table_info)) != NULL) { + oid tmpoid[MAX_OID_LEN]; + index = 0; + + DEBUGMSGTL((MODNAME_STR, "nextfe: 1 index:\n")); + DEBUGMSGVAR((MODNAME_STR, table_info->indexes)); + DEBUGMSG((MODNAME_STR, "\n")); + var = + SNMP_MALLOC_TYPEDEF(netsnmp_variable_list); + snmp_set_var_typed_value(var, ASN_UNSIGNED, + (uchar_t *)&index, sizeof (index)); + (void) memcpy(tmpoid, reginfo->rootoid, + reginfo->rootoid_len * sizeof (oid)); + tmpoid[reginfo->rootoid_len] = 1; + tmpoid[reginfo->rootoid_len + 1] = + table_info->colnum; + if (build_oid_segment(var) != SNMPERR_SUCCESS) { + snmp_free_varbind(var); + return (NULL); + } + snmp_free_varbind( + table_info->indexes->next_variable); + table_info->indexes->next_variable = var; + table_info->number_indexes = 2; + DEBUGMSGTL((MODNAME_STR, "nextfe: built fake " + "index:\n")); + DEBUGMSGVAR((MODNAME_STR, table_info->indexes)); + DEBUGMSG((MODNAME_STR, "\n")); + DEBUGMSGVAR((MODNAME_STR, + table_info->indexes->next_variable)); + DEBUGMSG((MODNAME_STR, "\n")); + } else { + if (sunFmProblemTable_nextpr(reginfo, + table_info) == NULL) + return (NULL); + } + break; + case 0: + if (sunFmProblemTable_nextpr(reginfo, table_info) == + NULL) + return (NULL); + break; + } + } +} + static sunFmFaultEvent_data_t * sunFmFaultEventTable_fe(netsnmp_handler_registration *reginfo, netsnmp_table_request_info *table_info) @@ -651,6 +758,21 @@ *(ulong_t *)table_info->indexes->next_variable->val.integer)); } +static sunFmFaultStatus_data_t +sunFmFaultStatusTable_fe(netsnmp_handler_registration *reginfo, + netsnmp_table_request_info *table_info) +{ + sunFmProblem_data_t *data; + + ASSERT(table_info->number_indexes == 2); + + if ((data = sunFmProblemTable_pr(reginfo, table_info)) == NULL) + return (NULL); + + return (faultstatus_lookup_index_exact(data, + *(ulong_t *)table_info->indexes->next_variable->val.integer)); +} + /*ARGSUSED*/ static void sunFmProblemTable_return(unsigned int reg, void *arg) @@ -828,6 +950,7 @@ netsnmp_table_request_info *table_info; sunFmProblem_data_t *pdata; sunFmFaultEvent_data_t *data; + sunFmFaultStatus_data_t status; ASSERT(netsnmp_handler_check_cache(cache) != NULL); @@ -869,30 +992,58 @@ * for GETNEXT requests. */ - switch (reqinfo->mode) { - case MODE_GET: - if ((data = sunFmFaultEventTable_fe(reginfo, table_info)) == - NULL) { + if (table_info->colnum == SUNFMFAULTEVENT_COL_STATUS) { + switch (reqinfo->mode) { + case MODE_GET: + if ((status = sunFmFaultStatusTable_fe(reginfo, + table_info)) == NULL) { + netsnmp_free_delegated_cache(cache); + (void) pthread_mutex_unlock(&update_lock); + return; + } + break; + case MODE_GETNEXT: + case MODE_GETBULK: + if ((status = sunFmFaultStatusTable_nextfe(reginfo, + table_info)) == NULL) { + netsnmp_free_delegated_cache(cache); + (void) pthread_mutex_unlock(&update_lock); + return; + } + break; + default: + snmp_log(LOG_ERR, MODNAME_STR + ": Unsupported request mode %d\n", reqinfo->mode); netsnmp_free_delegated_cache(cache); (void) pthread_mutex_unlock(&update_lock); return; } - break; - case MODE_GETNEXT: - case MODE_GETBULK: - if ((data = sunFmFaultEventTable_nextfe(reginfo, table_info)) == - NULL) { + } else { + switch (reqinfo->mode) { + case MODE_GET: + if ((data = sunFmFaultEventTable_fe(reginfo, + table_info)) == NULL) { + netsnmp_free_delegated_cache(cache); + (void) pthread_mutex_unlock(&update_lock); + return; + } + break; + case MODE_GETNEXT: + case MODE_GETBULK: + if ((data = sunFmFaultEventTable_nextfe(reginfo, + table_info)) == NULL) { + netsnmp_free_delegated_cache(cache); + (void) pthread_mutex_unlock(&update_lock); + return; + } + break; + default: + snmp_log(LOG_ERR, MODNAME_STR + ": Unsupported request mode %d\n", reqinfo->mode); netsnmp_free_delegated_cache(cache); (void) pthread_mutex_unlock(&update_lock); return; } - break; - default: - snmp_log(LOG_ERR, MODNAME_STR ": Unsupported request mode %d\n", - reqinfo->mode); - netsnmp_free_delegated_cache(cache); - (void) pthread_mutex_unlock(&update_lock); - return; } switch (table_info->colnum) { @@ -978,6 +1129,33 @@ free(str); break; } + case SUNFMFAULTEVENT_COL_STATUS: + { + ulong_t pl; + + if (status & FM_SUSPECT_FAULTY) + pl = SUNFMFAULTEVENT_STATE_FAULTY; + else if (status & FM_SUSPECT_NOT_PRESENT) + pl = SUNFMFAULTEVENT_STATE_REMOVED; + else if (status & FM_SUSPECT_REPLACED) + pl = SUNFMFAULTEVENT_STATE_REPLACED; + else if (status & FM_SUSPECT_REPAIRED) + pl = SUNFMFAULTEVENT_STATE_REPAIRED; + else if (status & FM_SUSPECT_ACQUITTED) + pl = SUNFMFAULTEVENT_STATE_ACQUITTED; + netsnmp_table_build_result(reginfo, request, table_info, + ASN_UNSIGNED, (uchar_t *)&pl, sizeof (pl)); + break; + } + case SUNFMFAULTEVENT_COL_LOCATION: + { + char *location = "-"; + + (void) nvlist_lookup_string(data, FM_FAULT_LOCATION, &location); + netsnmp_table_build_result(reginfo, request, table_info, + ASN_OCTET_STR, (uchar_t *)location, strlen(location)); + break; + } default: break; }
--- a/usr/src/lib/fm/libfmd_snmp/common/problem.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/libfmd_snmp/common/problem.h Sat Aug 02 03:26:27 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,6 +48,7 @@ ulong_t d_nsuspects; nvlist_t **d_suspects; nvlist_t *d_aci_event; + uint8_t *d_statuses; } sunFmProblem_data_t; typedef struct sunFmProblem_update_ctx { @@ -59,6 +60,7 @@ } sunFmProblem_update_ctx_t; typedef nvlist_t sunFmFaultEvent_data_t; +typedef uint8_t sunFmFaultStatus_data_t; int sunFmProblemTable_init(void); int sunFmFaultEventTable_init(void);
--- a/usr/src/lib/fm/topo/libtopo/common/dev.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/dev.c Sat Aug 02 03:26:27 2008 -0700 @@ -36,6 +36,7 @@ #include <sys/stat.h> #include <libnvpair.h> #include <fm/topo_mod.h> +#include <fm/fmd_fmri.h> #include <sys/fm/protocol.h> #include <topo_method.h> @@ -53,8 +54,12 @@ nvlist_t *, nvlist_t **); static int dev_fmri_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +static int dev_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t, + nvlist_t *, nvlist_t **); static int dev_fmri_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +static int dev_fmri_service_state(topo_mod_t *, tnode_t *, topo_version_t, + nvlist_t *, nvlist_t **); static const topo_method_t dev_methods[] = { { TOPO_METH_NVL2STR, TOPO_METH_NVL2STR_DESC, TOPO_METH_NVL2STR_VERSION, @@ -65,9 +70,15 @@ TOPO_STABILITY_INTERNAL, dev_fmri_create_meth }, { TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, dev_fmri_present }, + { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC, + TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, + dev_fmri_replaced }, { TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC, TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL, dev_fmri_unusable }, + { TOPO_METH_SERVICE_STATE, TOPO_METH_SERVICE_STATE_DESC, + TOPO_METH_SERVICE_STATE_VERSION, TOPO_STABILITY_INTERNAL, + dev_fmri_service_state }, { NULL } }; @@ -335,7 +346,7 @@ * If the device is present and there is a devid, it must also match. * so di_init that one node. No need for DINFOFORCE. */ - len = strlen(devpath) + strlen("/devices") + 1; + len = strlen(devpath) + strlen("/devices") + 1; path = topo_mod_alloc(mod, len); (void) snprintf(path, len, "/devices%s", devpath); if (devid == NULL) { @@ -383,6 +394,86 @@ /*ARGSUSED*/ static int +dev_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version, + nvlist_t *in, nvlist_t **out) +{ + uint8_t fmversion; + char *devpath = NULL; + uint32_t rval; + char *devid = NULL, *path; + ddi_devid_t id; + ddi_devid_t matchid; + di_node_t dnode; + struct stat sb; + int len; + + if (version > TOPO_METH_REPLACED_VERSION) + return (topo_mod_seterrno(mod, EMOD_VER_NEW)); + + if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 || + fmversion > FM_DEV_SCHEME_VERSION || + nvlist_lookup_string(in, FM_FMRI_DEV_PATH, &devpath) != 0) + return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM)); + + (void) nvlist_lookup_string(in, FM_FMRI_DEV_ID, &devid); + + if (devpath == NULL || strlen(devpath) == 0) + return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM)); + + /* + * stat() the device node in devfs. This will tell us if the device is + * present or not. Don't stat the minor, just the whole device. + * If the device is present and there is a devid, it must also match. + * so di_init that one node. No need for DINFOFORCE. + */ + len = strlen(devpath) + strlen("/devices") + 1; + path = topo_mod_alloc(mod, len); + (void) snprintf(path, len, "/devices%s", devpath); + if (devid == NULL) { + if (stat(path, &sb) != -1) + rval = FMD_OBJ_STATE_UNKNOWN; + else if ((dnode = di_init("/", DINFOCACHE)) == DI_NODE_NIL) + rval = FMD_OBJ_STATE_NOT_PRESENT; + else { + if (di_lookup_node(dnode, devpath) == DI_NODE_NIL) + rval = FMD_OBJ_STATE_NOT_PRESENT; + else + rval = FMD_OBJ_STATE_UNKNOWN; + di_fini(dnode); + } + } else { + if (stat(path, &sb) == -1) + rval = FMD_OBJ_STATE_NOT_PRESENT; + else if ((dnode = di_init(devpath, DINFOCPYONE)) == DI_NODE_NIL) + rval = FMD_OBJ_STATE_NOT_PRESENT; + else { + if ((id = di_devid(dnode)) == NULL || + devid_str_decode(devid, &matchid, NULL) != 0) + rval = FMD_OBJ_STATE_UNKNOWN; + else { + if (devid_compare(id, matchid) != 0) + rval = FMD_OBJ_STATE_REPLACED; + else + rval = FMD_OBJ_STATE_STILL_PRESENT; + devid_free(matchid); + } + di_fini(dnode); + } + } + topo_mod_free(mod, path, len); + + if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0) + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) { + nvlist_free(*out); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + return (0); +} + +/*ARGSUSED*/ +static int dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version, nvlist_t *in, nvlist_t **out) { @@ -392,7 +483,7 @@ uint32_t unusable; uint_t state; - if (version > TOPO_METH_PRESENT_VERSION) + if (version > TOPO_METH_UNUSABLE_VERSION) return (topo_mod_seterrno(mod, EMOD_VER_NEW)); if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 || @@ -428,6 +519,56 @@ return (0); } +/*ARGSUSED*/ +static int +dev_fmri_service_state(topo_mod_t *mod, tnode_t *node, topo_version_t version, + nvlist_t *in, nvlist_t **out) +{ + di_node_t dnode; + uint8_t fmversion; + char *devpath = NULL; + uint32_t service_state; + uint_t state; + + if (version > TOPO_METH_SERVICE_STATE_VERSION) + return (topo_mod_seterrno(mod, EMOD_VER_NEW)); + + if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 || + fmversion > FM_DEV_SCHEME_VERSION || + nvlist_lookup_string(in, FM_FMRI_DEV_PATH, &devpath) != 0) + return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM)); + + if (devpath == NULL) + return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM)); + + if ((dnode = di_init(devpath, DINFOCPYONE)) == DI_NODE_NIL) { + if (errno != ENXIO) + return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM)); + service_state = FMD_SERVICE_STATE_UNUSABLE; + } else { + uint_t retired = di_retired(dnode); + state = di_state(dnode); + if (retired || (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN | + DI_BUS_QUIESCED | DI_BUS_DOWN))) + service_state = FMD_SERVICE_STATE_UNUSABLE; + else if (state & DI_DEVICE_DEGRADED) + service_state = FMD_SERVICE_STATE_DEGRADED; + else + service_state = FMD_SERVICE_STATE_OK; + di_fini(dnode); + } + + if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0) + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + if (nvlist_add_uint32(*out, TOPO_METH_SERVICE_STATE_RET, + service_state) != 0) { + nvlist_free(*out); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + return (0); +} + static nvlist_t * dev_fmri_create(topo_mod_t *mp, const char *id, const char *path) {
--- a/usr/src/lib/fm/topo/libtopo/common/hc.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/hc.c Sat Aug 02 03:26:27 2008 -0700 @@ -36,6 +36,7 @@ #include <limits.h> #include <fm/topo_mod.h> #include <fm/topo_hc.h> +#include <fm/fmd_fmri.h> #include <sys/param.h> #include <sys/systeminfo.h> #include <sys/fm/protocol.h> @@ -61,6 +62,8 @@ nvlist_t **); static int hc_fmri_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +static int hc_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, + nvlist_t **); static int hc_fmri_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); static int hc_fmri_create_meth(topo_mod_t *, tnode_t *, topo_version_t, @@ -87,6 +90,9 @@ TOPO_STABILITY_INTERNAL, hc_compare }, { TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, hc_fmri_present }, + { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC, + TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, + hc_fmri_replaced }, { TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC, TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL, hc_fmri_unusable }, @@ -1716,6 +1722,84 @@ } static int +hc_is_replaced(topo_mod_t *mod, tnode_t *node, void *pdata) +{ + int err; + struct hc_args *hap = (struct hc_args *)pdata; + uint32_t present = 0; + + /* + * check with the enumerator that created this FMRI + * (topo node) + */ + if (topo_method_invoke(node, TOPO_METH_REPLACED, + TOPO_METH_REPLACED_VERSION, hap->ha_fmri, &hap->ha_nvl, + &err) < 0) { + /* + * enumerator didn't provide "replaced" method - so + * try "present" method + */ + if (topo_method_invoke(node, TOPO_METH_PRESENT, + TOPO_METH_PRESENT_VERSION, hap->ha_fmri, &hap->ha_nvl, + &err) < 0) { + /* no present method either - assume present */ + present = 1; + } else { + (void) nvlist_lookup_uint32(hap->ha_nvl, + TOPO_METH_PRESENT_RET, &present); + (void) nvlist_remove(hap->ha_nvl, + TOPO_METH_PRESENT_RET, DATA_TYPE_UINT32); + } + if (topo_mod_nvalloc(mod, &hap->ha_nvl, + NV_UNIQUE_NAME) == 0) + if (nvlist_add_uint32(hap->ha_nvl, + TOPO_METH_REPLACED_RET, + FMD_OBJ_STATE_UNKNOWN) == 0) + return (0); + return (ETOPO_PROP_NVL); + } + + return (0); +} + +static int +hc_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version, + nvlist_t *in, nvlist_t **out) +{ + int err; + struct hc_walk *hwp; + struct hc_args *hap; + + if (version > TOPO_METH_REPLACED_VERSION) + return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW)); + + if ((hap = topo_mod_alloc(mod, sizeof (struct hc_args))) == NULL) + return (topo_mod_seterrno(mod, EMOD_NOMEM)); + + hap->ha_fmri = in; + hap->ha_nvl = NULL; + if ((hwp = hc_walk_init(mod, node, hap->ha_fmri, hc_is_replaced, + (void *)hap)) != NULL) { + if (topo_walk_step(hwp->hcw_wp, TOPO_WALK_CHILD) == + TOPO_WALK_ERR) + err = -1; + else + err = 0; + topo_walk_fini(hwp->hcw_wp); + topo_mod_free(mod, hwp, sizeof (struct hc_walk)); + } else { + err = -1; + } + + if (hap->ha_nvl != NULL) + *out = hap->ha_nvl; + + topo_mod_free(mod, hap, sizeof (struct hc_args)); + + return (err); +} + +static int hc_unusable(topo_mod_t *mod, tnode_t *node, void *pdata) { int err;
--- a/usr/src/lib/fm/topo/libtopo/common/libtopo.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/libtopo.h Sat Aug 02 03:26:27 2008 -0700 @@ -90,9 +90,11 @@ * FMRI helper routines */ extern int topo_fmri_present(topo_hdl_t *, nvlist_t *, int *); +extern int topo_fmri_replaced(topo_hdl_t *, nvlist_t *, int *); extern int topo_fmri_contains(topo_hdl_t *, nvlist_t *, nvlist_t *, int *); extern int topo_fmri_expand(topo_hdl_t *, nvlist_t *, int *); extern int topo_fmri_unusable(topo_hdl_t *, nvlist_t *, int *); +extern int topo_fmri_service_state(topo_hdl_t *, nvlist_t *, int *); extern int topo_fmri_nvl2str(topo_hdl_t *, nvlist_t *, char **, int *); extern int topo_fmri_str2nvl(topo_hdl_t *, const char *, nvlist_t **, int *); extern int topo_fmri_asru(topo_hdl_t *, nvlist_t *, nvlist_t **, int *);
--- a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers Sat Aug 02 03:26:27 2008 -0700 @@ -41,7 +41,9 @@ topo_fmri_label; topo_fmri_nvl2str; topo_fmri_present; + topo_fmri_replaced; topo_fmri_serial; + topo_fmri_service_state; topo_fmri_setprop; topo_fmri_str2nvl; topo_fmri_strcmp;
--- a/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c Sat Aug 02 03:26:27 2008 -0700 @@ -30,6 +30,7 @@ #include <string.h> #include <limits.h> #include <fm/topo_mod.h> +#include <fm/fmd_fmri.h> #include <sys/fm/protocol.h> #include <topo_alloc.h> #include <topo_error.h> @@ -50,8 +51,10 @@ * * - expand * - present + * - replaced * - contains * - unusable + * - service_state * - nvl2str * * In addition, the following operations are supported per-FMRI: @@ -199,6 +202,34 @@ } int +topo_fmri_replaced(topo_hdl_t *thp, nvlist_t *fmri, int *err) +{ + uint32_t replaced = FMD_OBJ_STATE_NOT_PRESENT; + char *scheme; + nvlist_t *out = NULL; + tnode_t *rnode; + + if (nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &scheme) != 0) + return (set_error(thp, ETOPO_FMRI_MALFORM, err, + TOPO_METH_REPLACED, out)); + + if ((rnode = topo_hdl_root(thp, scheme)) == NULL) + return (set_error(thp, ETOPO_METHOD_NOTSUP, err, + TOPO_METH_REPLACED, out)); + + if (topo_method_invoke(rnode, TOPO_METH_REPLACED, + TOPO_METH_REPLACED_VERSION, fmri, &out, err) < 0) { + (void) set_error(thp, *err, err, TOPO_METH_REPLACED, out); + return (FMD_OBJ_STATE_UNKNOWN); + } + + (void) nvlist_lookup_uint32(out, TOPO_METH_REPLACED_RET, &replaced); + nvlist_free(out); + + return (replaced); +} + +int topo_fmri_contains(topo_hdl_t *thp, nvlist_t *fmri, nvlist_t *subfmri, int *err) { uint32_t contains; @@ -267,6 +298,34 @@ } int +topo_fmri_service_state(topo_hdl_t *thp, nvlist_t *fmri, int *err) +{ + char *scheme; + uint32_t service_state = FMD_SERVICE_STATE_UNKNOWN; + nvlist_t *out = NULL; + tnode_t *rnode; + + if (nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &scheme) != 0) + return (set_error(thp, ETOPO_FMRI_MALFORM, err, + TOPO_METH_SERVICE_STATE, out)); + + if ((rnode = topo_hdl_root(thp, scheme)) == NULL) + return (set_error(thp, ETOPO_METHOD_NOTSUP, err, + TOPO_METH_SERVICE_STATE, out)); + + if (topo_method_invoke(rnode, TOPO_METH_SERVICE_STATE, + TOPO_METH_SERVICE_STATE_VERSION, fmri, &out, err) < 0) + return (set_error(thp, *err, err, TOPO_METH_SERVICE_STATE, + out)); + + (void) nvlist_lookup_uint32(out, TOPO_METH_SERVICE_STATE_RET, + &service_state); + nvlist_free(out); + + return (service_state); +} + +int topo_fmri_expand(topo_hdl_t *thp, nvlist_t *fmri, int *err) { char *scheme;
--- a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h Sat Aug 02 03:26:27 2008 -0700 @@ -137,12 +137,24 @@ #define TOPO_METH_PRESENT_VERSION TOPO_METH_PRESENT_VERSION0 #define TOPO_METH_PRESENT_RET "present-ret" +#define TOPO_METH_REPLACED "topo_replaced" +#define TOPO_METH_REPLACED_DESC "replaced indicator" +#define TOPO_METH_REPLACED_VERSION0 0 +#define TOPO_METH_REPLACED_VERSION TOPO_METH_REPLACED_VERSION0 +#define TOPO_METH_REPLACED_RET "replaced-ret" + #define TOPO_METH_UNUSABLE "topo_unusable" #define TOPO_METH_UNUSABLE_DESC "unusable indicator" #define TOPO_METH_UNUSABLE_VERSION0 0 #define TOPO_METH_UNUSABLE_VERSION TOPO_METH_UNUSABLE_VERSION0 #define TOPO_METH_UNUSABLE_RET "unusable-ret" +#define TOPO_METH_SERVICE_STATE "topo_service_state" +#define TOPO_METH_SERVICE_STATE_DESC "service_state indicator" +#define TOPO_METH_SERVICE_STATE_VERSION0 0 +#define TOPO_METH_SERVICE_STATE_VERSION TOPO_METH_SERVICE_STATE_VERSION0 +#define TOPO_METH_SERVICE_STATE_RET "service_state-ret" + #define TOPO_METH_EXPAND "topo_expand" #define TOPO_METH_EXPAND_DESC "expand FMRI" #define TOPO_METH_EXPAND_VERSION0 0
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h Sat Aug 02 03:26:27 2008 -0700 @@ -138,6 +138,8 @@ nvlist_t *, nvlist_t **); extern int rank_fmri_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +extern int rank_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t, + nvlist_t *, nvlist_t **); extern int mem_asru_create(topo_mod_t *, nvlist_t *, nvlist_t **); /*
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c Sat Aug 02 03:26:27 2008 -0700 @@ -81,6 +81,9 @@ { TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, rank_fmri_present }, + { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC, + TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, + rank_fmri_replaced }, { NULL } };
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c Sat Aug 02 03:26:27 2008 -0700 @@ -33,6 +33,7 @@ #include <sys/types.h> #include <stdarg.h> #include <strings.h> +#include <fm/fmd_fmri.h> #include <sys/fm/protocol.h> #include "chip.h" @@ -454,3 +455,69 @@ return (0); } + +/* + * If we're getting called then the question of whether this dimm is plugged + * in has already been answered. What we don't know for sure is whether it's + * the same dimm or a different one plugged in the same slot. To check, we + * try and compare the serial numbers on the dimm in the current topology with + * the serial num from the unum fmri that got passed into this function as the + * argument. + * + * In the event we encounter problems comparing serials or if a comparison isn't + * possible, we err on the side of caution and set is_present to TRUE. + */ +/* ARGSUSED */ +int +rank_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version, + nvlist_t *in, nvlist_t **out) +{ + tnode_t *dimmnode; + int err, rval = FMD_OBJ_STATE_UNKNOWN; + nvlist_t *unum; + char *curr_serial, *old_serial = NULL; + + /* + * If a serial number for the dimm was available at the time of the + * fault, it will have been added as a string to the unum nvlist + */ + unum = in; + if (nvlist_lookup_string(unum, FM_FMRI_HC_SERIAL_ID, &old_serial) != 0) + goto done; + + /* + * If the current serial number is available for the DIMM that this rank + * belongs to, it will be accessible as a property on the parent (dimm) + * node. + */ + dimmnode = topo_node_parent(node); + if (topo_prop_get_string(dimmnode, TOPO_PGROUP_PROTOCOL, + FM_FMRI_HC_SERIAL_ID, &curr_serial, &err) != 0) { + if (err != ETOPO_PROP_NOENT) { + whinge(mod, &err, "rank_fmri_present: Unexpected error " + "retrieving serial from node"); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } else + goto done; + } + + if (strcmp(old_serial, curr_serial) != 0) + rval = FMD_OBJ_STATE_REPLACED; + else + rval = FMD_OBJ_STATE_STILL_PRESENT; + + topo_mod_strfree(mod, curr_serial); +done: + if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) { + whinge(mod, &err, + "rank_fmri_present: failed to allocate nvlist!"); + return (topo_mod_seterrno(mod, EMOD_NOMEM)); + } + + if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) { + nvlist_free(*out); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + return (0); +}
--- a/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c Sat Aug 02 03:26:27 2008 -0700 @@ -29,6 +29,7 @@ #include <strings.h> #include <umem.h> #include <fm/topo_mod.h> +#include <fm/fmd_fmri.h> #include <sys/fm/ldom.h> #include <sys/fm/protocol.h> @@ -37,7 +38,7 @@ /* * This enumerator creates cpu-schemed nodes for each strand found in the * sun4v Physical Rource Inventory (PRI). - * Each node export three methods present(), expand() and unusable(). + * Each node export four methods present(), expand() replaced() and unusable(). * */ @@ -52,6 +53,8 @@ static void cpu_release(topo_mod_t *, tnode_t *); static int cpu_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +static int cpu_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, + nvlist_t **); static int cpu_expand(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); static int cpu_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, @@ -66,6 +69,8 @@ static const topo_method_t cpu_methods[] = { { TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, cpu_present }, + { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC, + TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, cpu_replaced }, { TOPO_METH_EXPAND, TOPO_METH_EXPAND_DESC, TOPO_METH_EXPAND_VERSION, TOPO_STABILITY_INTERNAL, cpu_expand }, { TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC, @@ -182,6 +187,54 @@ /*ARGSUSED*/ static int +cpu_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t vers, + nvlist_t *in, nvlist_t **out) +{ + uint8_t version; + uint32_t cpuid; + uint64_t nvlserid; + uint32_t rval = FMD_OBJ_STATE_NOT_PRESENT; + md_cpumap_t *mcmp; + md_info_t *chip = (md_info_t *)topo_mod_getspecific(mod); + + /* + * Get the physical cpuid + */ + if (nvlist_lookup_uint8(in, FM_VERSION, &version) != 0 || + version > FM_CPU_SCHEME_VERSION || + nvlist_lookup_uint32(in, FM_FMRI_CPU_ID, &cpuid) != 0) { + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + /* + * Find the cpuid entry + * If the input nvl contains a serial number, the cpu is identified + * by a tuple <cpuid, cpuserial> + * Otherwise, the cpu is identified by the <cpuid>. + */ + if ((mcmp = cpu_find_cpumap(chip, cpuid)) != NULL) { + if (nvlist_lookup_uint64(in, FM_FMRI_CPU_SERIAL_ID, &nvlserid) + == 0) + rval = (nvlserid == mcmp->cpumap_serialno) ? + FMD_OBJ_STATE_STILL_PRESENT : + FMD_OBJ_STATE_REPLACED; + else + rval = FMD_OBJ_STATE_UNKNOWN; + } + + /* return the replaced status */ + if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0) + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) { + nvlist_free(*out); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + return (0); +} + +/*ARGSUSED*/ +static int cpu_expand(topo_mod_t *mod, tnode_t *node, topo_version_t vers, nvlist_t *in, nvlist_t **out) {
--- a/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c Sat Aug 02 03:26:27 2008 -0700 @@ -29,6 +29,7 @@ #include <strings.h> #include <umem.h> #include <fm/topo_mod.h> +#include <fm/fmd_fmri.h> #include <sys/fm/protocol.h> #include <sys/mem.h> @@ -37,7 +38,7 @@ /* * This enumerator creates mem-schemed nodes for each dimm found in the * sun4v Physical Resource Inventory (PRI). - * Each node exports four methods: present(), expand(), unusable(), + * Each node exports five methods: present(), expand(), unusable(), replaced(), * and contains(). * */ @@ -53,6 +54,8 @@ static void mem_release(topo_mod_t *, tnode_t *); static int mem_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); +static int mem_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, + nvlist_t **); static int mem_expand(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, nvlist_t **); static int mem_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *, @@ -69,6 +72,8 @@ static const topo_method_t mem_methods[] = { { TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, mem_present }, + { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC, + TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, mem_replaced }, { TOPO_METH_EXPAND, TOPO_METH_EXPAND_DESC, TOPO_METH_EXPAND_VERSION, TOPO_STABILITY_INTERNAL, mem_expand }, { TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC, @@ -166,6 +171,45 @@ return (0); } +/*ARGSUSED*/ +static int +mem_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t vers, + nvlist_t *in, nvlist_t **out) +{ + uint8_t version; + char **nvlserids; + size_t n, nserids; + uint32_t rval = FMD_OBJ_STATE_NOT_PRESENT; + md_mem_info_t *mem = (md_mem_info_t *)topo_mod_getspecific(mod); + + /* sun4v platforms all support dimm serial numbers */ + + if (nvlist_lookup_uint8(in, FM_VERSION, &version) != 0 || + version > FM_MEM_SCHEME_VERSION || + nvlist_lookup_string_array(in, FM_FMRI_MEM_SERIAL_ID, + &nvlserids, &nserids) != 0) { + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + /* Find the dimm entry */ + for (n = 0; n < nserids; n++) { + if (mem_get_dimm_by_sn(nvlserids[n], mem) != NULL) { + rval = FMD_OBJ_STATE_STILL_PRESENT; + break; + } + } + + /* return the replaced status */ + if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0) + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) { + nvlist_free(*out); + return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); + } + + return (0); +} + void mem_strarray_free(topo_mod_t *mod, char **arr, size_t dim) {
--- a/usr/src/lib/libdevinfo/devinfo.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/libdevinfo/devinfo.c Sat Aug 02 03:26:27 2008 -0700 @@ -978,6 +978,8 @@ result |= DI_DEVICE_OFFLINE; if (DI_NODE(node)->state & DEVI_DEVICE_DOWN) result |= DI_DEVICE_OFFLINE; + if (DI_NODE(node)->state & DEVI_DEVICE_DEGRADED) + result |= DI_DEVICE_DEGRADED; if (DI_NODE(node)->state & DEVI_BUS_QUIESCED) result |= DI_BUS_QUIESCED; if (DI_NODE(node)->state & DEVI_BUS_DOWN)
--- a/usr/src/lib/libdevinfo/libdevinfo.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/lib/libdevinfo/libdevinfo.h Sat Aug 02 03:26:27 2008 -0700 @@ -82,6 +82,7 @@ #define DI_DRIVER_DETACHED 0x8000 #define DI_DEVICE_OFFLINE 0x1 #define DI_DEVICE_DOWN 0x2 +#define DI_DEVICE_DEGRADED 0x4 #define DI_BUS_QUIESCED 0x100 #define DI_BUS_DOWN 0x200
--- a/usr/src/uts/common/os/devcfg.c Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/uts/common/os/devcfg.c Sat Aug 02 03:26:27 2008 -0700 @@ -7643,9 +7643,6 @@ (void *)dip)); if (finalize) e_ddi_offline_finalize(dip, DDI_FAILURE); - mutex_enter(&DEVI(dip)->devi_lock); - DEVI_SET_DEVICE_DEGRADED(dip); - mutex_exit(&DEVI(dip)->devi_lock); } /*
--- a/usr/src/uts/common/sys/fm/protocol.h Sat Aug 02 00:11:23 2008 -0700 +++ b/usr/src/uts/common/sys/fm/protocol.h Sat Aug 02 03:26:27 2008 -0700 @@ -56,6 +56,8 @@ #define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect" #define FM_LIST_ISOLATED_CLASS FM_LIST_EVENT ".isolated" #define FM_LIST_REPAIRED_CLASS FM_LIST_EVENT ".repaired" +#define FM_LIST_UPDATED_CLASS FM_LIST_EVENT ".updated" +#define FM_LIST_RESOLVED_CLASS FM_LIST_EVENT ".resolved" /* ereport class subcategory values */ #define FM_ERROR_CPU "cpu" @@ -72,7 +74,10 @@ /* list.* event payload member names */ #define FM_LIST_EVENT_SIZE "list-sz" -/* list.suspect, isolated, and repaired versions and payload member names */ +/* + * list.suspect, isolated, updated, repaired and resolved + * versions/payload member names. + */ #define FM_SUSPECT_UUID "uuid" #define FM_SUSPECT_DIAG_CODE "code" #define FM_SUSPECT_DIAG_TIME "diag-time" @@ -90,6 +95,10 @@ #define FM_SUSPECT_FAULTY 0x1 #define FM_SUSPECT_UNUSABLE 0x2 #define FM_SUSPECT_NOT_PRESENT 0x4 +#define FM_SUSPECT_DEGRADED 0x8 +#define FM_SUSPECT_REPAIRED 0x10 +#define FM_SUSPECT_REPLACED 0x20 +#define FM_SUSPECT_ACQUITTED 0x40 /* fault event versions and payload member names */ #define FM_FAULT_VERS0 0 @@ -111,6 +120,9 @@ #define FM_RSRC_ASRU_UUID "uuid" #define FM_RSRC_ASRU_CODE "code" #define FM_RSRC_ASRU_FAULTY "faulty" +#define FM_RSRC_ASRU_REPAIRED "repaired" +#define FM_RSRC_ASRU_REPLACED "replaced" +#define FM_RSRC_ASRU_ACQUITTED "acquitted" #define FM_RSRC_ASRU_UNUSABLE "unusable" #define FM_RSRC_ASRU_EVENT "event"