changeset 10656:217544b3cf73

6778240 generic historic diagnosis rules
author Stephen Hanson <Stephen.Hanson@Sun.COM>
date Sat, 26 Sep 2009 09:41:57 -0700
parents 1fc5061b760c
children 468e99e6f9ea
files usr/src/cmd/fm/fmadm/common/faulty.c usr/src/cmd/fm/fmd/common/fmd.c usr/src/cmd/fm/fmd/common/fmd_asru.c usr/src/cmd/fm/fmd/common/fmd_asru.h usr/src/cmd/fm/fmd/common/fmd_case.c usr/src/cmd/fm/fmd/common/fmd_case.h usr/src/cmd/fm/fmd/common/fmd_protocol.c usr/src/cmd/fm/fmd/common/fmd_protocol.h usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c usr/src/cmd/fm/modules/common/io-retire/rio_main.c usr/src/uts/common/sys/fm/protocol.h
diffstat 12 files changed, 746 insertions(+), 244 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/fm/fmadm/common/faulty.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmadm/common/faulty.c	Sat Sep 26 09:41:57 2009 -0700
@@ -591,48 +591,6 @@
 	return (rt);
 }
 
-/*
- * compare entries in two lists return true if the two lists have identical
- * content. The two lists may not have entries in the same order, so we compare
- * the size of the list as well as trying to find every entry from one list in
- * the other.
- */
-static int
-cmp_name_list(name_list_t *lxp1, name_list_t *lxp2)
-{
-	name_list_t *lp1, *lp2;
-	int l1 = 0, l2 = 0, common = 0;
-
-	lp2 = lxp2;
-	while (lp2) {
-		l2++;
-		lp2 = lp2->next;
-		if (lp2 == lxp2)
-			break;
-	}
-	lp1 = lxp1;
-	while (lp1) {
-		l1++;
-		lp2 = lxp2;
-		while (lp2) {
-			if (strcmp(lp2->name, lp1->name) == 0) {
-				common++;
-				break;
-			}
-			lp2 = lp2->next;
-			if (lp2 == lxp2)
-				break;
-		}
-		lp1 = lp1->next;
-		if (lp1 == lxp1)
-			break;
-	}
-	if (l1 == l2 && l2 == common)
-		return (0);
-	else
-		return (1);
-}
-
 static name_list_t *
 alloc_name_list(char *name, uint8_t pct)
 {
@@ -650,24 +608,6 @@
 	return (nlp);
 }
 
-static void
-free_name_list(name_list_t *list)
-{
-	name_list_t *next = list;
-	name_list_t *lp;
-
-	if (list) {
-		do {
-			lp = next;
-			next = lp->next;
-			if (lp->label)
-				free(lp->label);
-			free(lp->name);
-			free(lp);
-		} while (next != list);
-	}
-}
-
 static status_record_t *
 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class,
     name_list_t *fru, name_list_t *asru, name_list_t *resource,
@@ -863,64 +803,6 @@
 		add_list(status_rec_p, status_rec_p->asru, &status_asru_list);
 }
 
-/*
- * add uuid and diagnoses time to an existing record for similar fault on the
- * same fru
- */
-static void
-catalog_merge_record(status_record_t *status_rec_p, uurec_t *uurec_p,
-    name_list_t *asru, name_list_t *resource, name_list_t *serial,
-    boolean_t not_suppressed)
-{
-	uurec_t *uurec1_p;
-
-	status_rec_p->nrecs++;
-	/* add uurec in time order */
-	if (status_rec_p->uurec->sec > uurec_p->sec) {
-		uurec_p->next = status_rec_p->uurec;
-		uurec_p->prev = NULL;
-		status_rec_p->uurec = uurec_p;
-	} else {
-		uurec1_p = status_rec_p->uurec;
-		while (uurec1_p->next && uurec1_p->next->sec <= uurec_p->sec)
-			uurec1_p = uurec1_p->next;
-		if (uurec1_p->next)
-			uurec1_p->next->prev = uurec_p;
-		uurec_p->next = uurec1_p->next;
-		uurec_p->prev = uurec1_p;
-		uurec1_p->next = uurec_p;
-	}
-	status_rec_p->not_suppressed |= not_suppressed;
-	uurec_p->asru = merge_name_list(&status_rec_p->asru, asru, 0);
-	(void) merge_name_list(&status_rec_p->resource, resource, 0);
-	(void) merge_name_list(&status_rec_p->serial, serial, 0);
-}
-
-static status_record_t *
-record_in_catalog(name_list_t *class, name_list_t *fru,
-    char *msgid, hostid_t *host)
-{
-	sr_list_t *status_rec_p;
-	status_record_t *srp = NULL;
-
-	status_rec_p = status_rec_list;
-	while (status_rec_p) {
-		srp = status_rec_p->status_record;
-		if (host == srp->host &&
-		    cmp_name_list(class, srp->class) == 0 &&
-		    cmp_name_list(fru, srp->fru) == 0 &&
-		    strcmp(msgid, srp->msgid) == 0)
-			break;
-		if (status_rec_p->next == status_rec_list) {
-			srp = NULL;
-			break;
-		} else {
-			status_rec_p = status_rec_p->next;
-		}
-	}
-	return (srp);
-}
-
 static void
 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct)
 {
@@ -993,6 +875,15 @@
 			(void) merge_name_list(fru_p, nlp, 1);
 		}
 		get_serial_no(lfru, serial_p, lpct);
+	} else if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) != 0) {
+		/*
+		 * No FRU or resource. But we want to display the repair status
+		 * somehow, so create a dummy FRU field.
+		 */
+		nlp = alloc_name_list(dgettext("FMD", "None"), lpct);
+		nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
+		    FM_SUSPECT_DEGRADED);
+		(void) merge_name_list(fru_p, nlp, 1);
 	}
 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) {
 		name = get_nvl2str_topo(lasru);
@@ -1029,7 +920,6 @@
 	name_list_t *asru = NULL, *fru = NULL, *serial = NULL;
 	nvlist_t **nva;
 	uint8_t *ba;
-	status_record_t *status_rec_p;
 	uurec_t *uurec_p;
 	hostid_t *host;
 	boolean_t not_suppressed = 1;
@@ -1066,19 +956,8 @@
 	uurec_p->event = NULL;
 	(void) nvlist_dup(nvl, &uurec_p->event, 0);
 	host = find_hostid(nvl);
-	if (not_suppressed && !opt_g)
-		status_rec_p = NULL;
-	else
-		status_rec_p = record_in_catalog(class, fru, msgid, host);
-	if (status_rec_p) {
-		catalog_merge_record(status_rec_p, uurec_p, asru, resource,
-		    serial, not_suppressed);
-		free_name_list(class);
-		free_name_list(fru);
-	} else {
-		catalog_new_record(uurec_p, msgid, class, fru, asru,
-		    resource, serial, not_suppressed, host);
-	}
+	catalog_new_record(uurec_p, msgid, class, fru, asru,
+	    resource, serial, not_suppressed, host);
 }
 
 static void
@@ -1473,10 +1352,17 @@
 	}
 	if (full || srp->fru == NULL || srp->asru == NULL) {
 		if (srp->resource) {
-			print_name_list(srp->resource,
-			    dgettext("FMD", "Problem in  :"),
-			    NULL, full ? 0 : max_display, 0, print_rsrc_status,
-			    full);
+			status = asru_same_status(srp->resource);
+			if (status != -1) {
+				print_name_list(srp->resource,
+				    dgettext("FMD", "Problem in  :"), NULL,
+				    full ? 0 : max_display, 0, NULL, full);
+				print_rsrc_status(status, "             ");
+			} else
+				print_name_list(srp->resource,
+				    dgettext("FMD", "Problem in  :"),
+				    NULL, full ? 0 : max_display, 0,
+				    print_rsrc_status, full);
 		}
 	}
 	if (srp->fru) {
--- a/usr/src/cmd/fm/fmd/common/fmd.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd.c	Sat Sep 26 09:41:57 2009 -0700
@@ -288,6 +288,7 @@
 { "rpc.api.prog", &fmd_conf_uint32, "100170" },	/* FMD_API rpc program num */
 { "rpc.rcvsize", &fmd_conf_size, "128k" },	/* rpc receive buffer size */
 { "rpc.sndsize", &fmd_conf_size, "128k" },	/* rpc send buffer size */
+{ "rsrc.pollperiod", &fmd_conf_time, "1h" },	/* aged rsrcs poller period */
 { "rsrc.age", &fmd_conf_time, "30d" },		/* max age of old rsrc log */
 { "rsrc.zero", &fmd_conf_bool, "false" },	/* zero rsrc cache on start? */
 { "schemedir", &fmd_conf_string, _fmd_scheme_path }, /* path for scheme mods */
@@ -705,12 +706,12 @@
 static void
 fmd_clear_aged_rsrcs(fmd_t *dp, id_t id, hrtime_t hrt)
 {
-	hrtime_t delta;
+	hrtime_t period;
 
 	fmd_asru_clear_aged_rsrcs();
-	(void) fmd_conf_getprop(dp->d_conf, "rsrc.age", &delta);
+	(void) fmd_conf_getprop(dp->d_conf, "rsrc.pollperiod", &period);
 	(void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids,
-	    (fmd_timer_f *)fmd_clear_aged_rsrcs, dp, NULL, delta/10);
+	    (fmd_timer_f *)fmd_clear_aged_rsrcs, dp, NULL, period);
 }
 
 /*
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.c	Sat Sep 26 09:41:57 2009 -0700
@@ -441,7 +441,7 @@
 	boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
 	int ps;
 	boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
-	boolean_t acquitted = FMD_B_FALSE;
+	boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE;
 	nvlist_t *flt, *flt_copy, *asru;
 	char *case_uuid = NULL, *case_code = NULL;
 	fmd_asru_t *ap;
@@ -481,17 +481,20 @@
 	    &replaced);
 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
 	    &acquitted);
+	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED,
+	    &resolved);
 
 	/*
-	 * Attempt to recreate the case in either the CLOSED or REPAIRED state
-	 * (depending on whether the faulty bit is still set).
+	 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state
+	 * (depending on whether the faulty/resolved bits are set).
 	 * If the case is already present, fmd_case_recreate() will return it.
 	 * If not, we'll create a new orphaned case. Either way,  we use the
 	 * ASRU event to insert a suspect into the partially-restored case.
 	 */
 	fmd_module_lock(fmd.d_rmod);
 	cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
-	    FMD_CASE_REPAIRED, case_uuid, case_code);
+	    resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid,
+	    case_code);
 	fmd_case_hold(cp);
 	fmd_module_unlock(fmd.d_rmod);
 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
@@ -581,6 +584,8 @@
 		alp->al_reason = FMD_ASRU_REPAIRED;
 	else if (acquitted)
 		alp->al_reason = FMD_ASRU_ACQUITTED;
+	else
+		alp->al_reason = FMD_ASRU_REMOVED;
 
 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
@@ -712,6 +717,9 @@
 	int err;
 	fmd_asru_rep_arg_t fara;
 
+	if (!(alp->al_flags & FMD_ASRU_FAULTY))
+		return;
+
 	/*
 	 * Checking for aged resources only happens on the diagnosing side
 	 * not on a proxy.
@@ -740,10 +748,55 @@
 	}
 }
 
+/*ARGSUSED*/
+void
+fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg)
+{
+	struct timeval tv;
+	fmd_log_t *lp;
+	hrtime_t hrt;
+
+	/*
+	 * Case must be in resolved state for this to be called. So modified
+	 * time on resource cache entry should be the time the resolve occurred.
+	 * Return 0 if not yet hit rsrc.aged.
+	 */
+	fmd_time_gettimeofday(&tv);
+	lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
+	if (lp == NULL)
+		return;
+	hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
+	fmd_log_rele(lp);
+	if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime)
+		*(int *)arg = 0;
+}
+
+/*ARGSUSED*/
+void
+fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg)
+{
+	fmd_log_t *lp;
+	uint64_t hrt;
+
+	/*
+	 * Find most recent modified time of a set of resource cache entries.
+	 */
+	lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
+	if (lp == NULL)
+		return;
+	hrt = lp->log_stat.st_mtime;
+	fmd_log_rele(lp);
+	if (*(uint64_t *)arg < hrt)
+		*(uint64_t *)arg = hrt;
+}
+
 void
 fmd_asru_clear_aged_rsrcs()
 {
+	int check_if_aged = 1;
 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
+	fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved,
+	    &check_if_aged);
 }
 
 fmd_asru_hash_t *
@@ -1298,6 +1351,22 @@
 }
 
 /*
+ * Discard the case associated with this alp if it is in resolved state.
+ * Called on "fmadm flush".
+ */
+/*ARGSUSED*/
+void
+fmd_asru_flush(fmd_asru_link_t *alp, void *arg)
+{
+	int check_if_aged = 0;
+	int *rval = (int *)arg;
+
+	if (alp->al_case)
+		fmd_case_discard_resolved(alp->al_case, &check_if_aged);
+	*rval = 0;
+}
+
+/*
  * This is only called for proxied faults. Set various flags so we can
  * find the nature of the transport from the resource cache code.
  */
@@ -1459,7 +1528,8 @@
 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
 	    message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted,
-	    cip->ci_diag_de == NULL ? cip->ci_mod->mod_fmri : cip->ci_diag_de);
+	    cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ?
+	    cip->ci_mod->mod_fmri : cip->ci_diag_de);
 
 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
@@ -1525,7 +1595,9 @@
 	nstate = alp->al_flags & FMD_ASRU_STATE;
 
 	if (nstate == ostate) {
-		if (reason > alp->al_reason) {
+		if (reason > alp->al_reason &&
+		    ((fmd_case_impl_t *)alp->al_case)->ci_state <
+		    FMD_CASE_REPAIRED) {
 			alp->al_reason = reason;
 			fmd_asru_logevent(alp);
 			(void) pthread_cond_broadcast(&ap->asru_cv);
@@ -1560,6 +1632,18 @@
 	return (1);
 }
 
+/*ARGSUSED*/
+void
+fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused)
+{
+	fmd_asru_t *ap = alp->al_asru;
+
+	(void) pthread_mutex_lock(&ap->asru_lock);
+	fmd_asru_logevent(alp);
+	(void) pthread_cond_broadcast(&ap->asru_cv);
+	(void) pthread_mutex_unlock(&ap->asru_lock);
+}
+
 /*
  * Report the current known state of the link entry (ie this particular fault
  * affecting this particular ASRU).
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.h	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.h	Sat Sep 26 09:41:57 2009 -0700
@@ -195,6 +195,7 @@
 	char *fara_uuid;	/* uuid can be passed in for comparison */
 } fmd_asru_rep_arg_t;
 extern void fmd_asru_repaired(fmd_asru_link_t *, void *);
+extern void fmd_asru_flush(fmd_asru_link_t *, void *);
 
 typedef struct {
 	int	*faus_countp;
@@ -225,8 +226,11 @@
 
 extern int fmd_asru_setflags(fmd_asru_link_t *, uint_t);
 extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t, uint8_t);
+extern void fmd_asru_log_resolved(fmd_asru_link_t *, void *);
 extern int fmd_asru_al_getstate(fmd_asru_link_t *);
 extern int fmd_asru_getstate(fmd_asru_t *);
+extern void fmd_asru_check_if_aged(fmd_asru_link_t *, void *);
+void fmd_asru_most_recent(fmd_asru_link_t *, void *);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/cmd/fm/fmd/common/fmd_case.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.c	Sat Sep 26 09:41:57 2009 -0700
@@ -220,6 +220,39 @@
 }
 
 static void
+fmd_case_hash_apply_except_current(fmd_case_hash_t *chp,
+    void (*func)(fmd_case_t *, void *), void *arg, fmd_case_t *current)
+{
+	fmd_case_impl_t *cp, **cps, **cpp;
+	uint_t cpc, i;
+
+	(void) pthread_rwlock_rdlock(&chp->ch_lock);
+
+	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
+	cpc = chp->ch_count;
+
+	for (i = 0; i < chp->ch_hashlen; i++) {
+		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
+			if (cp != (fmd_case_impl_t *)current)
+				*cpp++ = fmd_case_tryhold(cp);
+			else
+				*cpp++ = cp;
+	}
+
+	ASSERT(cpp == cps + cpc);
+	(void) pthread_rwlock_unlock(&chp->ch_lock);
+
+	for (i = 0; i < cpc; i++) {
+		if (cps[i] != NULL && cps[i] != (fmd_case_impl_t *)current) {
+			func((fmd_case_t *)cps[i], arg);
+			fmd_case_rele((fmd_case_t *)cps[i]);
+		}
+	}
+
+	fmd_free(cps, cpc * sizeof (fmd_case_t *));
+}
+
+static void
 fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
 {
 	uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
@@ -453,6 +486,12 @@
 	return (nvl);
 }
 
+static int fmd_case_match_on_faulty_overlap = 1;
+static int fmd_case_match_on_acquit_overlap = 1;
+static int fmd_case_auto_acquit_isolated = 1;
+static int fmd_case_auto_acquit_non_acquitted = 1;
+static int fmd_case_too_recent = 10; /* time in seconds */
+
 static boolean_t
 fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
 {
@@ -498,82 +537,377 @@
 }
 
 static int
-fmd_case_match_suspect(fmd_case_susp_t *cis, fmd_case_susp_t *xcis)
+fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
 {
 	char *class, *new_class;
 
-	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_ASRU))
+	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
 		return (0);
-	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl,
-	    FM_FAULT_RESOURCE))
+	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
+		return (0);
+	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
 		return (0);
-	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_FRU))
-		return (0);
-	(void) nvlist_lookup_string(xcis->cis_nvl, FM_CLASS, &class);
-	(void) nvlist_lookup_string(cis->cis_nvl, FM_CLASS, &new_class);
+	(void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
+	(void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
 	return (strcmp(class, new_class) == 0);
 }
 
+typedef struct {
+	int	*fcms_countp;
+	int	fcms_maxcount;
+	fmd_case_impl_t *fcms_cip;
+	uint8_t *fcms_new_susp_state;
+	uint8_t *fcms_old_susp_state;
+	uint8_t *fcms_old_match_state;
+} fcms_t;
+#define	SUSPECT_STATE_FAULTY				0x1
+#define	SUSPECT_STATE_ISOLATED				0x2
+#define	SUSPECT_STATE_REMOVED				0x4
+#define	SUSPECT_STATE_ACQUITED				0x8
+#define	SUSPECT_STATE_REPAIRED				0x10
+#define	SUSPECT_STATE_REPLACED				0x20
+#define	SUSPECT_STATE_NO_MATCH				0x1
+
+/*
+ * This is called for each suspect in the old case. Compare it against each
+ * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
+ * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
+ * found in the old case.
+ */
+static void
+fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
+{
+	fcms_t *fcmsp = (fcms_t *)arg;
+	fmd_case_impl_t *cip = fcmsp->fcms_cip;
+	fmd_case_susp_t *cis;
+	int i = 0;
+	int state = fmd_asru_al_getstate(alp);
+
+	if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
+		return;
+
+	if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
+	    alp->al_reason == FMD_ASRU_REMOVED))
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_REMOVED;
+	else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_ISOLATED;
+	else if (state & FMD_ASRU_FAULTY)
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_FAULTY;
+	else if (alp->al_reason == FMD_ASRU_REPLACED)
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_REPLACED;
+	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_ACQUITED;
+	else
+		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
+		    SUSPECT_STATE_REPAIRED;
+
+	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
+		if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
+			break;
+	if (cis != NULL)
+		fcmsp->fcms_new_susp_state[i] =
+		    fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
+	else
+		fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
+		    SUSPECT_STATE_NO_MATCH;
+	(*fcmsp->fcms_countp)++;
+}
+
+typedef struct {
+	int	*fca_do_update;
+	fmd_case_impl_t *fca_cip;
+} fca_t;
+
+/*
+ * Re-fault all acquitted suspects that are still present in the new list.
+ */
+static void
+fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
+{
+	fca_t *fcap = (fca_t *)arg;
+	fmd_case_impl_t *cip = fcap->fca_cip;
+	fmd_case_susp_t *cis;
+	int state = fmd_asru_al_getstate(alp);
+
+	if (!(state & FMD_ASRU_FAULTY) &&
+	    alp->al_reason == FMD_ASRU_ACQUITTED) {
+		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
+			if (fmd_case_match_suspect(cis->cis_nvl,
+			    alp->al_event) == 1)
+				break;
+		if (cis != NULL) {
+			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
+			*fcap->fca_do_update = 1;
+		}
+	}
+}
+
+/*
+ * Re-fault all suspects that are still present in the new list.
+ */
+static void
+fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
+{
+	fca_t *fcap = (fca_t *)arg;
+	fmd_case_impl_t *cip = fcap->fca_cip;
+	fmd_case_susp_t *cis;
+	int state = fmd_asru_al_getstate(alp);
+
+	if (!(state & FMD_ASRU_FAULTY)) {
+		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
+			if (fmd_case_match_suspect(cis->cis_nvl,
+			    alp->al_event) == 1)
+				break;
+		if (cis != NULL) {
+			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
+			*fcap->fca_do_update = 1;
+		}
+	}
+}
+
+/*
+ * Acquit all suspects that are no longer present in the new list.
+ */
+static void
+fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
+{
+	fca_t *fcap = (fca_t *)arg;
+	fmd_case_impl_t *cip = fcap->fca_cip;
+	fmd_case_susp_t *cis;
+	int state = fmd_asru_al_getstate(alp);
+
+	if (state & FMD_ASRU_FAULTY) {
+		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
+			if (fmd_case_match_suspect(cis->cis_nvl,
+			    alp->al_event) == 1)
+				break;
+		if (cis == NULL) {
+			(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+			    FMD_ASRU_ACQUITTED);
+			*fcap->fca_do_update = 1;
+		}
+	}
+}
+
+/*
+ * Acquit all isolated suspects.
+ */
+static void
+fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
+{
+	int *do_update = (int *)arg;
+	int state = fmd_asru_al_getstate(alp);
+
+	if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
+	    (state & FMD_ASRU_FAULTY)) {
+		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+		    FMD_ASRU_ACQUITTED);
+		*do_update = 1;
+	}
+}
+
 /*
- * see if an identical suspect list already exists in the cache
+ * Acquit suspect which matches specified nvlist
  */
-static int
-fmd_case_check_for_dups(fmd_case_t *cp)
+static void
+fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
+{
+	nvlist_t *nvl = (nvlist_t *)arg;
+	int state = fmd_asru_al_getstate(alp);
+
+	if ((state & FMD_ASRU_FAULTY) &&
+	    fmd_case_match_suspect(nvl, alp->al_event) == 1)
+		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+		    FMD_ASRU_ACQUITTED);
+}
+
+typedef struct {
+	fmd_case_impl_t *fccd_cip;
+	uint8_t *fccd_new_susp_state;
+	uint8_t *fccd_new_match_state;
+	int *fccd_discard_new;
+	int *fccd_adjust_new;
+} fccd_t;
+
+/*
+ * see if a matching suspect list already exists in the cache
+ */
+static void
+fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
 {
-	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp, *xcip;
-	fmd_case_hash_t *chp = fmd.d_cases;
-	fmd_case_susp_t *xcis, *cis;
-	int match = 0, match_susp;
-	uint_t h;
+	fccd_t *fccdp = (fccd_t *)arg;
+	fmd_case_impl_t *new_cip = fccdp->fccd_cip;
+	fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
+	int i, count = 0, do_update = 0, got_isolated_overlap = 0;
+	int got_faulty_overlap = 0;
+	int got_acquit_overlap = 0;
+	boolean_t too_recent;
+	uint64_t most_recent = 0;
+	fcms_t fcms;
+	fca_t fca;
+	uint8_t *new_susp_state;
+	uint8_t *old_susp_state;
+	uint8_t *old_match_state;
+
+	new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
+	for (i = 0; i < new_cip->ci_nsuspects; i++)
+		new_susp_state[i] = 0;
+	old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
+	for (i = 0; i < old_cip->ci_nsuspects; i++)
+		old_susp_state[i] = 0;
+	old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
+	for (i = 0; i < old_cip->ci_nsuspects; i++)
+		old_match_state[i] = 0;
 
-	(void) pthread_rwlock_rdlock(&chp->ch_lock);
+	/*
+	 * Compare with each suspect in the existing case.
+	 */
+	fcms.fcms_countp = &count;
+	fcms.fcms_maxcount = old_cip->ci_nsuspects;
+	fcms.fcms_cip = new_cip;
+	fcms.fcms_new_susp_state = new_susp_state;
+	fcms.fcms_old_susp_state = old_susp_state;
+	fcms.fcms_old_match_state = old_match_state;
+	fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
+	    fmd_case_match_suspects, &fcms);
+
+	/*
+	 * If we have some faulty, non-isolated suspects that overlap, then most
+	 * likely it is the suspects that overlap in the suspect lists that are
+	 * to blame. So we can consider this to be a match.
+	 */
+	for (i = 0; i < new_cip->ci_nsuspects; i++)
+		if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
+			got_faulty_overlap = 1;
+	if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
+		goto got_match;
+
+	/*
+	 * If we have no faulty, non-isolated suspects in the old case, but we
+	 * do have some acquitted suspects that overlap, then most likely it is
+	 * the acquitted suspects that overlap in the suspect lists that are
+	 * to blame. So we can consider this to be a match.
+	 */
+	for (i = 0; i < new_cip->ci_nsuspects; i++)
+		if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
+			got_acquit_overlap = 1;
+	for (i = 0; i < old_cip->ci_nsuspects; i++)
+		if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
+			got_acquit_overlap = 0;
+	if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
+		goto got_match;
 
 	/*
-	 * Find all cases with this code
+	 * Check that all suspects in the new list are present in the old list.
+	 * Return if we find one that isn't.
 	 */
-	h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
-	for (xcip = chp->ch_code_hash[h]; xcip != NULL;
-	    xcip = xcip->ci_code_next) {
-		/*
-		 * only look for any cases (apart from this one)
-		 * whose code and number of suspects match
-		 */
-		if (xcip == cip || fmd_case_tryhold(xcip) == NULL)
-			continue;
-		if (strcmp(xcip->ci_code, cip->ci_code) != 0 ||
-		    xcip->ci_nsuspects != cip->ci_nsuspects) {
-			fmd_case_rele((fmd_case_t *)xcip);
-			continue;
-		}
+	for (i = 0; i < new_cip->ci_nsuspects; i++)
+		if (new_susp_state[i] == 0)
+			return;
 
-		/*
-		 * For each suspect in one list, check if there
-		 * is an identical suspect in the other list
-		 */
-		match = 1;
-		for (xcis = xcip->ci_suspects; xcis != NULL;
-		    xcis = xcis->cis_next) {
-			match_susp = 0;
-			for (cis = cip->ci_suspects; cis != NULL;
-			    cis = cis->cis_next) {
-				if (fmd_case_match_suspect(cis, xcis) == 1) {
-					match_susp = 1;
-					break;
-				}
-			}
-			if (match_susp == 0) {
-				match = 0;
-				break;
+	/*
+	 * Check that all suspects in the old list are present in the new list
+	 * *or* they are isolated or removed/replaced (which would explain why
+	 * they are not present in the new list). Return if we find one that is
+	 * faulty and unisolated or repaired or acquitted, and that is not
+	 * present in the new case.
+	 */
+	for (i = 0; i < old_cip->ci_nsuspects; i++)
+		if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
+		    (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
+		    old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
+		    old_susp_state[i] == SUSPECT_STATE_REPAIRED))
+			return;
+
+got_match:
+	/*
+	 * If the old case is already in repaired/resolved state, we can't
+	 * do anything more with it, so keep the new case, but acquit some
+	 * of the suspects if appropriate.
+	 */
+	if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
+		if (fmd_case_auto_acquit_non_acquitted) {
+			*fccdp->fccd_adjust_new = 1;
+			for (i = 0; i < new_cip->ci_nsuspects; i++) {
+				fccdp->fccd_new_susp_state[i] |=
+				    new_susp_state[i];
+				if (new_susp_state[i] == 0)
+					fccdp->fccd_new_susp_state[i] =
+					    SUSPECT_STATE_NO_MATCH;
 			}
 		}
-		fmd_case_rele((fmd_case_t *)xcip);
-		if (match) {
-			(void) pthread_rwlock_unlock(&chp->ch_lock);
-			return (1);
+		return;
+	}
+
+	/*
+	 * Otherwise discard the new case and keep the old, again updating the
+	 * state of the suspects as appropriate
+	 */
+	*fccdp->fccd_discard_new = 1;
+	fca.fca_cip = new_cip;
+	fca.fca_do_update = &do_update;
+
+	/*
+	 * See if new case occurred within fmd_case_too_recent seconds of the
+	 * most recent modification to the old case and if so don't do
+	 * auto-acquit. This avoids problems if a flood of ereports come in and
+	 * they don't all get diagnosed before the first case causes some of
+	 * the devices to be isolated making it appear that an isolated device
+	 * was in the suspect list.
+	 */
+	fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+	    fmd_asru_most_recent, &most_recent);
+	too_recent = (new_cip->ci_tv.tv_sec - most_recent <
+	    fmd_case_too_recent);
+
+	if (got_faulty_overlap) {
+		/*
+		 * Acquit any suspects not present in the new list, plus
+		 * any that are are present but are isolated.
+		 */
+		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+		    fmd_case_acquit_no_match, &fca);
+		if (fmd_case_auto_acquit_isolated && !too_recent)
+			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+			    fmd_case_acquit_isolated, &do_update);
+	} else if (got_acquit_overlap) {
+		/*
+		 * Re-fault the acquitted matching suspects and acquit all
+		 * isolated suspects.
+		 */
+		if (fmd_case_auto_acquit_isolated && !too_recent) {
+			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+			    fmd_case_fault_acquitted_matching, &fca);
+			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+			    fmd_case_acquit_isolated, &do_update);
 		}
+	} else if (fmd_case_auto_acquit_isolated) {
+		/*
+		 * To get here, there must be no faulty or acquitted suspects,
+		 * but there must be at least one isolated suspect. Just acquit
+		 * non-matching isolated suspects. If there are no matching
+		 * isolated suspects, then re-fault all matching suspects.
+		 */
+		for (i = 0; i < new_cip->ci_nsuspects; i++)
+			if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
+				got_isolated_overlap = 1;
+		if (!got_isolated_overlap)
+			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+			    fmd_case_fault_all_matching, &fca);
+		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
+		    fmd_case_acquit_no_match, &fca);
 	}
-	(void) pthread_rwlock_unlock(&chp->ch_lock);
-	return (0);
+
+	/*
+	 * If we've updated anything in the old case, call fmd_case_update()
+	 */
+	if (do_update)
+		fmd_case_update(old_cp);
 }
 
 /*
@@ -610,22 +944,49 @@
 {
 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 	fmd_asru_hash_t *ahp = fmd.d_asrus;
-
+	int discard_new = 0, i;
 	fmd_case_susp_t *cis;
 	fmd_asru_link_t *alp;
+	uint8_t *new_susp_state;
+	uint8_t *new_match_state;
+	int adjust_new = 0;
+	fccd_t fccd;
 
 	(void) pthread_mutex_lock(&cip->ci_lock);
 	if (cip->ci_code == NULL)
 		(void) fmd_case_mkcode(cp);
 	else if (cip->ci_precanned)
 		fmd_case_code_hash_insert(fmd.d_cases, cip);
-	if (fmd_case_check_for_dups(cp) == 1) {
+
+	/*
+	 * First we must see if any matching cases already exist.
+	 */
+	new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
+	for (i = 0; i < cip->ci_nsuspects; i++)
+		new_susp_state[i] = 0;
+	new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
+	for (i = 0; i < cip->ci_nsuspects; i++)
+		new_match_state[i] = 0;
+	fccd.fccd_cip = cip;
+	fccd.fccd_adjust_new = &adjust_new;
+	fccd.fccd_new_susp_state = new_susp_state;
+	fccd.fccd_new_match_state = new_match_state;
+	fccd.fccd_discard_new = &discard_new;
+	fmd_case_hash_apply_except_current(fmd.d_cases, fmd_case_check_for_dups,
+	    &fccd, cp);
+
+	if (discard_new) {
+		/*
+		 * We've found an existing case that is a match and it is not
+		 * already in repaired or resolved state. So we can close this
+		 * one as a duplicate.
+		 */
 		(void) pthread_mutex_unlock(&cip->ci_lock);
 		return (1);
 	}
 
 	/*
-	 * no suspect list already exists  - allocate new cache entries
+	 * Allocate new cache entries
 	 */
 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
 		if ((alp = fmd_asru_hash_create_entry(ahp,
@@ -640,6 +1001,45 @@
 		(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
 	}
 
+	if (adjust_new) {
+		int some_suspect = 0, some_not_suspect = 0;
+
+		/*
+		 * There is one or more matching case but they are already in
+		 * repaired or resolved state. So we need to keep the new
+		 * case, but we can adjust it. Repaired/removed/replaced
+		 * suspects are unlikely to be to blame (unless there are
+		 * actually two separate faults). So if we have a combination of
+		 * repaired/replaced/removed suspects and acquitted suspects in
+		 * the old lists, then we should acquit in the new list those
+		 * that were repaired/replaced/removed in the old.
+		 */
+		for (i = 0; i < cip->ci_nsuspects; i++) {
+			if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
+			    (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
+			    (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
+			    (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
+				some_not_suspect = 1;
+			else
+				some_suspect = 1;
+		}
+		if (some_suspect && some_not_suspect) {
+			for (cis = cip->ci_suspects, i = 0; cis != NULL;
+			    cis = cis->cis_next, i++)
+				if ((new_susp_state[i] &
+				    SUSPECT_STATE_REPLACED) ||
+				    (new_susp_state[i] &
+				    SUSPECT_STATE_REPAIRED) ||
+				    (new_susp_state[i] &
+				    SUSPECT_STATE_REMOVED) ||
+				    (new_match_state[i] &
+				    SUSPECT_STATE_NO_MATCH))
+					fmd_asru_hash_apply_by_case(fmd.d_asrus,
+					    cp, fmd_case_acquit_suspect,
+					    cis->cis_nvl);
+		}
+	}
+
 	(void) pthread_mutex_unlock(&cip->ci_lock);
 	return (0);
 }
@@ -934,8 +1334,6 @@
 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
 	fmd_case_impl_t *eip;
 
-	ASSERT(state < FMD_CASE_RESOLVED);
-
 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
 	fmd_buf_hash_create(&cip->ci_bufs);
 
@@ -987,11 +1385,12 @@
 
 			/*
 			 * When recreating an orphan case, state passed in may
-			 * either be CLOSED (faulty) or REPAIRED (!faulty). If
+			 * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
 			 * any suspects are still CLOSED (faulty) then the
 			 * overall state needs to be CLOSED.
 			 */
-			if (cip->ci_state == FMD_CASE_REPAIRED &&
+			if ((cip->ci_state == FMD_CASE_REPAIRED ||
+			    cip->ci_state == FMD_CASE_RESOLVED) &&
 			    state == FMD_CASE_CLOSED)
 				cip->ci_state = FMD_CASE_CLOSED;
 			(void) pthread_mutex_unlock(&cip->ci_lock);
@@ -1397,13 +1796,8 @@
 		 * using fmd_xprt_uuresolved().
 		 */
 		if (flags & FMD_CF_RESOLVED) {
-			if (cip->ci_xprt != NULL) {
+			if (cip->ci_xprt != NULL)
 				fmd_list_delete(&cip->ci_mod->mod_cases, cip);
-			} else {
-				fmd_module_lock(cip->ci_mod);
-				fmd_list_delete(&cip->ci_mod->mod_cases, cip);
-				fmd_module_unlock(cip->ci_mod);
-			}
 		} else {
 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
 			    fmd_case_unusable_and_present,
@@ -1414,9 +1808,6 @@
 				fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
 				break;
 			}
-			fmd_module_lock(cip->ci_mod);
-			fmd_list_delete(&cip->ci_mod->mod_cases, cip);
-			fmd_module_unlock(cip->ci_mod);
 		}
 
 		cip->ci_state = FMD_CASE_RESOLVED;
@@ -1455,9 +1846,6 @@
 			return;
 		}
 
-		fmd_module_lock(cip->ci_mod);
-		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
-		fmd_module_unlock(cip->ci_mod);
 		resolved = 1;
 		break;
 	}
@@ -1482,17 +1870,73 @@
 	}
 
 	if (resolved) {
-		/*
-		 * If we transitioned to RESOLVED, adjust the reference count to
-		 * reflect our removal from fmd.d_rmod->mod_cases above.  If the
-		 * caller has not placed an additional hold on the case, it
-		 * will now be freed.
-		 */
-		(void) pthread_mutex_lock(&cip->ci_lock);
-		fmd_asru_hash_delete_case(fmd.d_asrus, cp);
+		if (cip->ci_xprt != NULL) {
+			/*
+			 * If we transitioned to RESOLVED, adjust the reference
+			 * count to reflect our removal from
+			 * fmd.d_rmod->mod_cases above.  If the caller has not
+			 * placed an additional hold on the case, it will now
+			 * be freed.
+			 */
+			(void) pthread_mutex_lock(&cip->ci_lock);
+			fmd_asru_hash_delete_case(fmd.d_asrus, cp);
+			(void) pthread_mutex_unlock(&cip->ci_lock);
+			fmd_case_rele(cp);
+		} else {
+			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+			    fmd_asru_log_resolved, NULL);
+			(void) pthread_mutex_lock(&cip->ci_lock);
+			/* mark as "ready to be discarded */
+			cip->ci_flags |= FMD_CF_RES_CMPL;
+			(void) pthread_mutex_unlock(&cip->ci_lock);
+		}
+	}
+}
+
+/*
+ * Discard any case if it is in RESOLVED state (and if check_if_aged argument
+ * is set if all suspects have passed the rsrc.aged time).
+ */
+void
+fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
+{
+	int check_if_aged = *(int *)arg;
+	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+	/*
+	 * First check if case has completed transition to resolved.
+	 */
+	(void) pthread_mutex_lock(&cip->ci_lock);
+	if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
 		(void) pthread_mutex_unlock(&cip->ci_lock);
-		fmd_case_rele(cp);
+		return;
 	}
+
+	/*
+	 * Now if check_is_aged is set, see if all suspects have aged.
+	 */
+	if (check_if_aged) {
+		int aged = 1;
+
+		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+		    fmd_asru_check_if_aged, &aged);
+		if (!aged) {
+			(void) pthread_mutex_unlock(&cip->ci_lock);
+			return;
+		}
+	}
+
+	/*
+	 * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
+	 * do it twice.
+	 */
+	fmd_module_lock(cip->ci_mod);
+	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+	fmd_module_unlock(cip->ci_mod);
+	fmd_asru_hash_delete_case(fmd.d_asrus, cp);
+	cip->ci_flags &= ~FMD_CF_RES_CMPL;
+	(void) pthread_mutex_unlock(&cip->ci_lock);
+	fmd_case_rele(cp);
 }
 
 /*
@@ -1964,7 +2408,7 @@
 }
 
 /*ARGSUSED*/
-void
+static void
 fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
 {
 	int not_faulty = 0;
@@ -1978,6 +2422,11 @@
 	if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
 		return;
 
+	if (cip->ci_state == FMD_CASE_RESOLVED) {
+		cip->ci_flags |= FMD_CF_RES_CMPL;
+		return;
+	}
+
 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
 	    &not_faulty);
@@ -1991,9 +2440,6 @@
 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
 		    fmd_case_unusable_and_present, &any_unusable_and_present);
 		if (!any_unusable_and_present) {
-			fmd_module_lock(cip->ci_mod);
-			fmd_list_delete(&cip->ci_mod->mod_cases, cip);
-			fmd_module_unlock(cip->ci_mod);
 			cip->ci_state = FMD_CASE_RESOLVED;
 
 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
@@ -2007,10 +2453,7 @@
 			TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
 			    cip->ci_uuid));
 			fmd_case_publish(cp, FMD_CASE_RESOLVED);
-			(void) pthread_mutex_lock(&cip->ci_lock);
-			fmd_asru_hash_delete_case(fmd.d_asrus, cp);
-			(void) pthread_mutex_unlock(&cip->ci_lock);
-			fmd_case_rele(cp);
+			cip->ci_flags |= FMD_CF_RES_CMPL;
 		} else {
 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
 			    cip->ci_uuid));
--- a/usr/src/cmd/fm/fmd/common/fmd_case.h	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.h	Sat Sep 26 09:41:57 2009 -0700
@@ -96,6 +96,7 @@
 #define	FMD_CF_RESOLVED		0x10	/* case has been resolved */
 #define	FMD_CF_INVISIBLE	0x20	/* case should be invisible */
 #define	FMD_CF_DELETING		0x40	/* case is about to be deleted */
+#define	FMD_CF_RES_CMPL		0x80	/* transition to resolved is complete */
 
 /*
  * ci_proxy_asru flags record if we created a new asru on the proxy side and
@@ -160,6 +161,7 @@
 extern int fmd_case_contains(fmd_case_t *, fmd_event_t *);
 extern int fmd_case_orphaned(fmd_case_t *);
 extern void fmd_case_repair_replay(void);
+extern void fmd_case_discard_resolved(fmd_case_t *, void *);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.c	Sat Sep 26 09:41:57 2009 -0700
@@ -206,7 +206,7 @@
     nvlist_t *fmri, const char *uuid, const char *code,
     boolean_t faulty, boolean_t unusable, boolean_t message, nvlist_t *event,
     struct timeval *tvp, boolean_t repaired, boolean_t replaced,
-    boolean_t acquitted, nvlist_t *diag_de)
+    boolean_t acquitted, boolean_t resolved, nvlist_t *diag_de)
 {
 	nvlist_t *nvl;
 	int64_t tod[2];
@@ -233,6 +233,7 @@
 	err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, repaired);
 	err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, replaced);
 	err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, acquitted);
+	err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED, resolved);
 	err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, unusable);
 	err |= nvlist_add_boolean_value(nvl, FM_SUSPECT_MESSAGE, message);
 	err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.h	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.h	Sat Sep 26 09:41:57 2009 -0700
@@ -74,7 +74,7 @@
     struct timeval *);
 extern nvlist_t *fmd_protocol_rsrc_asru(const char *, nvlist_t *,
     const char *, const char *, boolean_t, boolean_t, boolean_t, nvlist_t *,
-    struct timeval *m, boolean_t, boolean_t, boolean_t, nvlist_t *);
+    struct timeval *m, boolean_t, boolean_t, boolean_t, boolean_t, nvlist_t *);
 extern nvlist_t *fmd_protocol_fmderror(int, const char *, va_list);
 extern nvlist_t *fmd_protocol_moderror(struct fmd_module *, int, const char *);
 extern nvlist_t *fmd_protocol_xprt_ctl(struct fmd_module *,
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c	Sat Sep 26 09:41:57 2009 -0700
@@ -500,7 +500,22 @@
 {
 	int err = FMD_ADM_ERR_RSRCNOTF;
 
-	fmd_adm_do_repair(name, req, &err, FMD_ASRU_REPAIRED, NULL);
+	/*
+	 * If anyone does an fmadm flush command, discard any resolved
+	 * cases that were being retained for historic diagnosis.
+	 */
+	if (fmd_rpc_deny(req))
+		err = FMD_ADM_ERR_PERM;
+	else {
+		fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
+		    fmd_asru_flush, &err);
+		fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
+		    fmd_asru_flush, &err);
+		fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
+		    fmd_asru_flush, &err);
+		fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
+		    fmd_asru_flush, &err);
+	}
 	*rvp = err;
 	return (TRUE);
 }
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c	Sat Sep 26 09:41:57 2009 -0700
@@ -406,8 +406,8 @@
 cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
 {
 	char *uuid = NULL;
-	nvlist_t **nva;
-	uint_t nvc = 0;
+	nvlist_t **nva, **save_nva;
+	uint_t nvc = 0, save_nvc;
 	uint_t keepopen;
 	int err = 0;
 	nvlist_t *asru = NULL;
@@ -421,7 +421,8 @@
 		return;
 	}
 
-	keepopen = nvc;
+	save_nvc = keepopen = nvc;
+	save_nva = nva;
 	while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 ||
 	    !fmd_case_uuclosed(hdl, uuid))) {
 		nvlist_t *nvl = *nva++;
@@ -455,6 +456,24 @@
 	}
 
 	/*
+	 * Run though again to catch any new faults in list.updated.
+	 */
+	while (save_nvc-- != 0 && (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
+		nvlist_t *nvl = *save_nva++;
+		const cma_subscriber_t *subr;
+		int has_fault;
+
+		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
+			continue;
+		if (subr->subr_func != NULL) {
+			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
+			    FMD_HAS_FAULT_ASRU, NULL);
+			if (has_fault == 1)
+				err = subr->subr_func(hdl, nvl, asru, uuid, 0);
+		}
+	}
+
+	/*
 	 * Do not close the case if we are handling cache faults.
 	 */
 	if (asru != NULL) {
--- a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c	Sat Sep 26 09:41:57 2009 -0700
@@ -139,7 +139,8 @@
 	/*
 	 * If disabled, we don't do retire. We still do unretires though
 	 */
-	if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
+	if (global_disable && (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
+	    strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
 		fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
 		return;
 	}
@@ -226,6 +227,51 @@
 			}
 		}
 	}
+	/*
+	 * Run through again to handle new faults in a list.updated.
+	 */
+	for (f = 0; f < nfaults; f++) {
+		if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
+		    &rtr) == 0 && !rtr) {
+			fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
+			continue;
+		}
+
+		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
+		    &asru) != 0) {
+			fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
+			continue;
+		}
+
+		scheme = NULL;
+		if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
+		    strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
+			fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
+			    scheme ? scheme : "<NULL>");
+			continue;
+		}
+
+		if (fault_exception(hdl, faults[f]))
+			continue;
+
+		if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
+		    &path) != 0 || path[0] == '\0') {
+			fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
+			continue;
+		}
+
+		if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
+			if (fmd_nvl_fmri_has_fault(hdl, asru,
+			    FMD_HAS_FAULT_ASRU, NULL) == 1) {
+				error = di_retire_device(path, &drt, 0);
+				if (error != 0) {
+					fmd_hdl_debug(hdl, "rio_recv:"
+					    " di_retire_device failed:"
+					    " error: %d %s", error, path);
+				}
+			}
+		}
+	}
 
 	/*
 	 * Don't send uuclose or uuresolved unless at least one suspect
--- a/usr/src/uts/common/sys/fm/protocol.h	Sat Sep 26 05:04:14 2009 -0600
+++ b/usr/src/uts/common/sys/fm/protocol.h	Sat Sep 26 09:41:57 2009 -0700
@@ -122,6 +122,7 @@
 #define	FM_RSRC_ASRU_REPAIRED		"repaired"
 #define	FM_RSRC_ASRU_REPLACED		"replaced"
 #define	FM_RSRC_ASRU_ACQUITTED		"acquitted"
+#define	FM_RSRC_ASRU_RESOLVED		"resolved"
 #define	FM_RSRC_ASRU_UNUSABLE		"unusable"
 #define	FM_RSRC_ASRU_EVENT		"event"