Mercurial > illumos > onarm
diff usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h @ 0:c9caec207d52 b86
Initial porting based on b86
author | Koji Uno <koji.uno@sun.com> |
---|---|
date | Tue, 02 Jun 2009 18:56:50 +0900 |
parents | |
children | 1a15d5aaf794 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h Tue Jun 02 18:56:50 2009 +0900 @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _CMD_MEM_H +#define _CMD_MEM_H + +#pragma ident "@(#)cmd_mem.h 1.14 07/07/26 SMI" + +/* + * Support routines for managing state related to memory modules. + * + * Correctable errors generally cause changes to the DIMM-related state (see + * cmd_dimm.c), whereas uncorrectable errors tend to use the bank-related + * routines (see cmd_bank.c). The primary exception to this division (though + * it eventually devolves to one of the two) is the RxE/FRx pair emitted by + * UltraSPARC-IIIi processors. With these errors, a complete pair must be + * received and matched before we know whether we're dealing with a CE or a UE. + */ + +#include <cmd.h> +#include <cmd_state.h> +#include <cmd_fmri.h> +#include <sys/errclassify.h> +#include <cmd_cpu.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define CMD_MEM_F_FAULTING 0x1 + +/* + * Used to store as-yet unmatched IOxEs, RxEs, and FRxs. When a new IOxE, + * RxE or FRx arrives, we traverse the cmd.cmd_iorxefrx list, looking for + * matching entries. Matching has a cpuid-based component, as well as a + * temporal one. We can compare the cpuids directly, using the cmd_iorxefrx_t + * and the newly-received event. Temporal comparison isn't performed directly. + * Instead, we ensure that entries in the iorxefrx list are removed when they + * expire by means of timers. This frees the matching code from the need to + * worry about time. + */ +typedef struct cmd_iorxefrx { + cmd_list_t rf_list; /* List of cmd_iorxefrx_t's */ + cmd_errcl_t rf_errcl; /* Error type (CMD_ERRCL_*) */ + uint_t rf_afsr_agentid; /* Remote Agent ID (from AFSR) */ + uint_t rf_det_agentid; /* Locat Agent ID (from detector) */ + id_t rf_expid; /* Timer ID for entry expiration */ + uint64_t rf_afar; /* Valid for RxE only */ + uint8_t rf_afar_status; /* Valid for RxE only */ + ce_dispact_t rf_type; /* Valid for RxE only */ + uint16_t rf_synd; /* Valid for FRx only */ + uint8_t rf_synd_status; /* Valid for FRx only */ + uint64_t rf_afsr; /* Valid for FRx only */ + uint64_t rf_disp; /* Valid for RCE only */ +} cmd_iorxefrx_t; + +typedef struct cmd_dimm cmd_dimm_t; +typedef struct cmd_bank cmd_bank_t; +#ifdef sun4v +typedef struct cmd_branch cmd_branch_t; +#endif + +/* + * Correctable and Uncorrectable memory errors + * + * CEs of "Unknown" or "Intermittent" classification are not used in diagnosis. + * + * "Persistent" CEs are added to per-DIMM SERD engines. When the + * engine for a given DIMM fires, the page corresponding to the CE that + * caused the engine to fire is retired, and the SERD engine for that + * DIMM is reset. + * + * "Possibly Persistent" CEs are at least Persistent and so are treated + * as "Persistent" errors above, being added to the same SERD engines. + * + * "Leaky" CEs and "Sticky" CEs trigger immediate page retirement. + * + * "Possibly Sticky" CEs to which no valid partner test has been applied + * are not used in diagnosis. Where a valid partner test has been applied + * but did not confirm "Sticky" status there is a _suggestion_ that the + * original cpu may be a bad reader or writer or suffering from other + * datapath issues. To avoid retiring pages for such non-DIMM problems + * these classifications are also not used in diagnosis. + * + * UEs immediately trigger page retirements, but do not affect the CE SERD + * engines. In addition, UEs are recorded in the UE caches of the detecting + * CPUs. When a page is to be retired, a fault.memory.page fault is + * generated. + * + */ + +typedef cmd_evdisp_t cmd_xe_handler_f(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, uint64_t, uint8_t, uint16_t, uint8_t, ce_dispact_t, uint64_t, + nvlist_t *); + +extern ce_dispact_t cmd_mem_name2type(const char *, int); +extern int cmd_synd2upos(uint16_t); +extern int cmd_upos2dram(uint16_t); +extern cmd_evdisp_t cmd_ce(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_ue(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_ce_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, uint64_t, uint8_t, uint16_t, uint8_t, + ce_dispact_t, uint64_t, nvlist_t *); +extern cmd_evdisp_t cmd_ue_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, uint64_t, uint8_t, uint16_t, uint8_t, + ce_dispact_t, uint64_t, nvlist_t *); +extern cmd_evdisp_t cmd_mem_synd_check(fmd_hdl_t *, uint64_t, uint8_t, + uint16_t, uint8_t, cmd_cpu_t *); +extern void cmd_dimm_close(fmd_hdl_t *, void *); +extern void cmd_bank_close(fmd_hdl_t *, void *); +#ifdef sun4v +extern void cmd_branch_close(fmd_hdl_t *, void *); +extern cmd_evdisp_t cmd_fb(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +#endif + +/* + * US-IIIi I/O, Remote and Foreign Read memory errors + * + * When one processor or I/O bridge attempts to read memory local to + * another processor, one each of IOCE/IOUE/RCE/RUE and FRC/FRU will be + * generated, depending on the type of error. Both the IOxE/RxE and the FRx + * are needed, as each contains data necessary to the diagnosis of the error. + * Upon receipt of one of the errors, we wait until we receive the other. + * When the pair has been successfully received and matched, a CE or UE, + * as appropriate, is synthesized from the data in the matched ereports. + * The synthesized ereports are handled by the normal CE and UE mechanisms. + */ +extern cmd_evdisp_t cmd_frx(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_rxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_ioxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_ioxe_sec(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, + nvlist_t *nvl, const char *class, cmd_errcl_t clcode, + cmd_errcl_t matchmask); + +/* + * A list of received IOxE/RxE/FRx ereports is maintained for correlation + * purposes (see above). These two routines manage the addition of new + * ereports, and the retrieval of existing ones. Pruning of the list is + * handled automatically. + */ +extern void cmd_iorxefrx_queue(fmd_hdl_t *, cmd_iorxefrx_t *); +extern void cmd_iorxefrx_free(fmd_hdl_t *, cmd_iorxefrx_t *); + +extern const char *cmd_fmri_get_unum(nvlist_t *); +extern nvlist_t *cmd_mem_fmri_create(const char *); +extern nvlist_t *cmd_mem_fmri_derive(fmd_hdl_t *, uint64_t, uint64_t, uint16_t); + +extern void cmd_mem_case_restore(fmd_hdl_t *, cmd_case_t *, fmd_case_t *, + const char *, const char *); +extern char *cmd_mem_serdnm_create(fmd_hdl_t *, const char *, const char *); +extern char *cmd_page_serdnm_create(fmd_hdl_t *, const char *, uint64_t); +extern void cmd_mem_retirestat_create(fmd_hdl_t *, fmd_stat_t *, const char *, + uint64_t, const char *); +extern int cmd_mem_thresh_check(fmd_hdl_t *, uint_t); +extern ulong_t cmd_mem_get_phys_pages(fmd_hdl_t *); + +extern void cmd_mem_timeout(fmd_hdl_t *, id_t); +extern void cmd_mem_gc(fmd_hdl_t *); +extern void cmd_mem_fini(fmd_hdl_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _CMD_MEM_H */