Mercurial > illumos > onarm
annotate usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h @ 4:1a15d5aaf794
synchronized with onnv_86 (6202) in onnv-gate
author | Koji Uno <koji.uno@sun.com> |
---|---|
date | Mon, 31 Aug 2009 14:38:03 +0900 |
parents | c9caec207d52 |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
5 * Common Development and Distribution License (the "License"). | |
6 * You may not use this file except in compliance with the License. | |
7 * | |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | |
23 * Use is subject to license terms. | |
24 */ | |
25 | |
26 #ifndef _CMD_MEM_H | |
27 #define _CMD_MEM_H | |
28 | |
4
1a15d5aaf794
synchronized with onnv_86 (6202) in onnv-gate
Koji Uno <koji.uno@sun.com>
parents:
0
diff
changeset
|
29 #pragma ident "%Z%%M% %I% %E% SMI" |
0 | 30 |
31 /* | |
32 * Support routines for managing state related to memory modules. | |
33 * | |
34 * Correctable errors generally cause changes to the DIMM-related state (see | |
35 * cmd_dimm.c), whereas uncorrectable errors tend to use the bank-related | |
36 * routines (see cmd_bank.c). The primary exception to this division (though | |
37 * it eventually devolves to one of the two) is the RxE/FRx pair emitted by | |
38 * UltraSPARC-IIIi processors. With these errors, a complete pair must be | |
39 * received and matched before we know whether we're dealing with a CE or a UE. | |
40 */ | |
41 | |
42 #include <cmd.h> | |
43 #include <cmd_state.h> | |
44 #include <cmd_fmri.h> | |
45 #include <sys/errclassify.h> | |
46 #include <cmd_cpu.h> | |
47 | |
48 #ifdef __cplusplus | |
49 extern "C" { | |
50 #endif | |
51 | |
52 #define CMD_MEM_F_FAULTING 0x1 | |
53 | |
54 /* | |
55 * Used to store as-yet unmatched IOxEs, RxEs, and FRxs. When a new IOxE, | |
56 * RxE or FRx arrives, we traverse the cmd.cmd_iorxefrx list, looking for | |
57 * matching entries. Matching has a cpuid-based component, as well as a | |
58 * temporal one. We can compare the cpuids directly, using the cmd_iorxefrx_t | |
59 * and the newly-received event. Temporal comparison isn't performed directly. | |
60 * Instead, we ensure that entries in the iorxefrx list are removed when they | |
61 * expire by means of timers. This frees the matching code from the need to | |
62 * worry about time. | |
63 */ | |
64 typedef struct cmd_iorxefrx { | |
65 cmd_list_t rf_list; /* List of cmd_iorxefrx_t's */ | |
66 cmd_errcl_t rf_errcl; /* Error type (CMD_ERRCL_*) */ | |
67 uint_t rf_afsr_agentid; /* Remote Agent ID (from AFSR) */ | |
68 uint_t rf_det_agentid; /* Locat Agent ID (from detector) */ | |
69 id_t rf_expid; /* Timer ID for entry expiration */ | |
70 uint64_t rf_afar; /* Valid for RxE only */ | |
71 uint8_t rf_afar_status; /* Valid for RxE only */ | |
72 ce_dispact_t rf_type; /* Valid for RxE only */ | |
73 uint16_t rf_synd; /* Valid for FRx only */ | |
74 uint8_t rf_synd_status; /* Valid for FRx only */ | |
75 uint64_t rf_afsr; /* Valid for FRx only */ | |
76 uint64_t rf_disp; /* Valid for RCE only */ | |
77 } cmd_iorxefrx_t; | |
78 | |
79 typedef struct cmd_dimm cmd_dimm_t; | |
80 typedef struct cmd_bank cmd_bank_t; | |
81 #ifdef sun4v | |
82 typedef struct cmd_branch cmd_branch_t; | |
83 #endif | |
84 | |
85 /* | |
86 * Correctable and Uncorrectable memory errors | |
87 * | |
88 * CEs of "Unknown" or "Intermittent" classification are not used in diagnosis. | |
89 * | |
90 * "Persistent" CEs are added to per-DIMM SERD engines. When the | |
91 * engine for a given DIMM fires, the page corresponding to the CE that | |
92 * caused the engine to fire is retired, and the SERD engine for that | |
93 * DIMM is reset. | |
94 * | |
95 * "Possibly Persistent" CEs are at least Persistent and so are treated | |
96 * as "Persistent" errors above, being added to the same SERD engines. | |
97 * | |
98 * "Leaky" CEs and "Sticky" CEs trigger immediate page retirement. | |
99 * | |
100 * "Possibly Sticky" CEs to which no valid partner test has been applied | |
101 * are not used in diagnosis. Where a valid partner test has been applied | |
102 * but did not confirm "Sticky" status there is a _suggestion_ that the | |
103 * original cpu may be a bad reader or writer or suffering from other | |
104 * datapath issues. To avoid retiring pages for such non-DIMM problems | |
105 * these classifications are also not used in diagnosis. | |
106 * | |
107 * UEs immediately trigger page retirements, but do not affect the CE SERD | |
108 * engines. In addition, UEs are recorded in the UE caches of the detecting | |
109 * CPUs. When a page is to be retired, a fault.memory.page fault is | |
110 * generated. | |
111 * | |
112 */ | |
113 | |
114 typedef cmd_evdisp_t cmd_xe_handler_f(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
115 const char *, uint64_t, uint8_t, uint16_t, uint8_t, ce_dispact_t, uint64_t, | |
116 nvlist_t *); | |
117 | |
118 extern ce_dispact_t cmd_mem_name2type(const char *, int); | |
119 extern int cmd_synd2upos(uint16_t); | |
120 extern int cmd_upos2dram(uint16_t); | |
121 extern cmd_evdisp_t cmd_ce(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
122 const char *, cmd_errcl_t); | |
123 extern cmd_evdisp_t cmd_ue(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
124 const char *, cmd_errcl_t); | |
125 extern cmd_evdisp_t cmd_ce_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
126 const char *, uint64_t, uint8_t, uint16_t, uint8_t, | |
127 ce_dispact_t, uint64_t, nvlist_t *); | |
128 extern cmd_evdisp_t cmd_ue_common(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
129 const char *, uint64_t, uint8_t, uint16_t, uint8_t, | |
130 ce_dispact_t, uint64_t, nvlist_t *); | |
131 extern cmd_evdisp_t cmd_mem_synd_check(fmd_hdl_t *, uint64_t, uint8_t, | |
132 uint16_t, uint8_t, cmd_cpu_t *); | |
133 extern void cmd_dimm_close(fmd_hdl_t *, void *); | |
134 extern void cmd_bank_close(fmd_hdl_t *, void *); | |
135 #ifdef sun4v | |
136 extern void cmd_branch_close(fmd_hdl_t *, void *); | |
137 extern cmd_evdisp_t cmd_fb(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
138 const char *, cmd_errcl_t); | |
139 #endif | |
140 | |
141 /* | |
142 * US-IIIi I/O, Remote and Foreign Read memory errors | |
143 * | |
144 * When one processor or I/O bridge attempts to read memory local to | |
145 * another processor, one each of IOCE/IOUE/RCE/RUE and FRC/FRU will be | |
146 * generated, depending on the type of error. Both the IOxE/RxE and the FRx | |
147 * are needed, as each contains data necessary to the diagnosis of the error. | |
148 * Upon receipt of one of the errors, we wait until we receive the other. | |
149 * When the pair has been successfully received and matched, a CE or UE, | |
150 * as appropriate, is synthesized from the data in the matched ereports. | |
151 * The synthesized ereports are handled by the normal CE and UE mechanisms. | |
152 */ | |
153 extern cmd_evdisp_t cmd_frx(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
154 const char *, cmd_errcl_t); | |
155 extern cmd_evdisp_t cmd_rxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
156 const char *, cmd_errcl_t); | |
157 extern cmd_evdisp_t cmd_ioxe(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
158 const char *, cmd_errcl_t); | |
159 extern cmd_evdisp_t cmd_ioxe_sec(fmd_hdl_t *, fmd_event_t *, nvlist_t *, | |
160 const char *, cmd_errcl_t); | |
161 extern cmd_evdisp_t cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, | |
162 nvlist_t *nvl, const char *class, cmd_errcl_t clcode, | |
163 cmd_errcl_t matchmask); | |
164 | |
165 /* | |
166 * A list of received IOxE/RxE/FRx ereports is maintained for correlation | |
167 * purposes (see above). These two routines manage the addition of new | |
168 * ereports, and the retrieval of existing ones. Pruning of the list is | |
169 * handled automatically. | |
170 */ | |
171 extern void cmd_iorxefrx_queue(fmd_hdl_t *, cmd_iorxefrx_t *); | |
172 extern void cmd_iorxefrx_free(fmd_hdl_t *, cmd_iorxefrx_t *); | |
173 | |
174 extern const char *cmd_fmri_get_unum(nvlist_t *); | |
175 extern nvlist_t *cmd_mem_fmri_create(const char *); | |
176 extern nvlist_t *cmd_mem_fmri_derive(fmd_hdl_t *, uint64_t, uint64_t, uint16_t); | |
177 | |
178 extern void cmd_mem_case_restore(fmd_hdl_t *, cmd_case_t *, fmd_case_t *, | |
179 const char *, const char *); | |
180 extern char *cmd_mem_serdnm_create(fmd_hdl_t *, const char *, const char *); | |
181 extern char *cmd_page_serdnm_create(fmd_hdl_t *, const char *, uint64_t); | |
182 extern void cmd_mem_retirestat_create(fmd_hdl_t *, fmd_stat_t *, const char *, | |
183 uint64_t, const char *); | |
184 extern int cmd_mem_thresh_check(fmd_hdl_t *, uint_t); | |
185 extern ulong_t cmd_mem_get_phys_pages(fmd_hdl_t *); | |
186 | |
187 extern void cmd_mem_timeout(fmd_hdl_t *, id_t); | |
188 extern void cmd_mem_gc(fmd_hdl_t *); | |
189 extern void cmd_mem_fini(fmd_hdl_t *); | |
190 | |
191 #ifdef __cplusplus | |
192 } | |
193 #endif | |
194 | |
195 #endif /* _CMD_MEM_H */ |