# HG changeset patch # User rb144127 # Date 1189615510 25200 # Node ID 15cab3a79834034ee99e5eff54baaad0100e36ec # Parent ffbab30c82bd946fab9700150a6a65dc957de30a 6591930 domain ETM omits check for dup xid on ALERT msgs 6591931 domain ETM falsely reports etm_xport_open_fail stat 6594506 domain ETM and LDC likely deadlock when xmit queue full diff -r ffbab30c82bd -r 15cab3a79834 usr/src/cmd/fm/modules/sun4v/etm/etm.c --- a/usr/src/cmd/fm/modules/sun4v/etm/etm.c Tue Sep 11 16:39:42 2007 -0700 +++ b/usr/src/cmd/fm/modules/sun4v/etm/etm.c Wed Sep 12 09:45:10 2007 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,11 +39,11 @@ #include #include -#include -#include #include #include #include +#include +#include #include "etm_xport_api.h" #include "etm_etm_proto.h" @@ -63,7 +63,6 @@ #include #include - /* * ----------------------------- forward decls ------------------------------- */ @@ -91,6 +90,7 @@ { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, + { ETM_PROP_NM_MAX_RESP_Q_LEN, FMD_TYPE_INT32, "512" }, { NULL, 0, NULL } }; @@ -130,6 +130,17 @@ #define ETM_XID_INC (2) +typedef struct etm_resp_q_ele { + + etm_xport_conn_t rqe_conn; /* open connection to send on */ + etm_proto_v1_pp_t *rqe_hdrp; /* ptr to ETM msg hdr */ + size_t rqe_hdr_sz; /* sizeof ETM msg hdr */ + int32_t rqe_resp_code; /* response code to send */ + + struct etm_resp_q_ele *rqe_nextp; /* PRIVATE - next ele ptr */ + +} etm_resp_q_ele_t; /* responder queue element */ + /* * ---------------------------- global data ---------------------------------- */ @@ -149,6 +160,27 @@ static pthread_t etm_svr_tid = NULL; /* thread id of connection acceptance server */ +static pthread_t +etm_resp_tid = NULL; /* thread id of msg responder */ + +static etm_resp_q_ele_t +*etm_resp_q_head = NULL; /* ptr to cur head of responder queue */ + +static etm_resp_q_ele_t +*etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */ + +static uint32_t +etm_resp_q_cur_len = 0; /* cur length (ele cnt) of responder queue */ + +static uint32_t +etm_resp_q_max_len = 0; /* max length (ele cnt) of responder queue */ + +static pthread_mutex_t +etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER; /* protects responder queue */ + +static pthread_cond_t +etm_resp_q_cv = PTHREAD_COND_INITIALIZER; /* nudges msg responder */ + static volatile int etm_is_dying = 0; /* bool for dying (killing self) */ @@ -164,6 +196,9 @@ static uint32_t etm_xid_posted_ev = 0; /* xid of last FMA_EVENT msg/event posted OK to FMD */ +static uint32_t +etm_xid_posted_sa = 0; /* xid of last ALERT msg/event posted OK to syslog */ + static uint8_t etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ @@ -212,6 +247,12 @@ fmd_stat_t etm_wr_body_control; fmd_stat_t etm_wr_body_response; + fmd_stat_t etm_rd_max_ev_per_msg; + fmd_stat_t etm_wr_max_ev_per_msg; + + fmd_stat_t etm_resp_q_cur_len; + fmd_stat_t etm_resp_q_max_len; + /* ETM byte counters */ fmd_stat_t etm_wr_fmd_bytes; @@ -232,6 +273,12 @@ fmd_stat_t etm_rd_dup_fmaevent; fmd_stat_t etm_wr_dup_fmaevent; + fmd_stat_t etm_rd_dup_alert; + fmd_stat_t etm_wr_dup_alert; + + fmd_stat_t etm_enq_drop_resp_q; + fmd_stat_t etm_deq_drop_resp_q; + /* ETM protocol failures */ fmd_stat_t etm_magic_bad; @@ -278,6 +325,7 @@ fmd_stat_t etm_fmd_fini_badargs; /* Alert logging errors */ + fmd_stat_t etm_log_err; fmd_stat_t etm_msg_err; @@ -314,6 +362,16 @@ { "etm_wr_body_response", FMD_TYPE_UINT64, "ETM response msg bodies sent to xport" }, + { "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64, + "max FMA events per ETM msg from xport" }, + { "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64, + "max FMA events per ETM msg to xport" }, + + { "etm_resp_q_cur_len", FMD_TYPE_UINT64, + "cur enqueued response msgs to xport" }, + { "etm_resp_q_max_len", FMD_TYPE_UINT64, + "max enqueable response msgs to xport" }, + /* ETM byte counters */ { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, @@ -341,9 +399,19 @@ "dropped FMA events to xport" }, { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, - "duplicate FMA events from xport" }, + "duplicate FMA events rcvd from xport" }, { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, - "duplicate FMA events to xport" }, + "duplicate FMA events sent to xport" }, + + { "etm_rd_dup_alert", FMD_TYPE_UINT64, + "duplicate ALERTs rcvd from xport" }, + { "etm_wr_dup_alert", FMD_TYPE_UINT64, + "duplicate ALERTs sent to xport" }, + + { "etm_enq_drop_resp_q", FMD_TYPE_UINT64, + "dropped response msgs on enq" }, + { "etm_deq_drop_resp_q", FMD_TYPE_UINT64, + "dropped response msgs on deq" }, /* ETM protocol failures */ @@ -418,6 +486,7 @@ "bad arguments from fmd_fini entry point" }, /* Alert logging errors */ + { "etm_log_err", FMD_TYPE_UINT64, "failed to log message to log(7D)" }, { "etm_msg_err", FMD_TYPE_UINT64, @@ -534,7 +603,7 @@ if ((conn = etm_xport_open(hdl, addr)) == NULL) { nev = (-errno); fmd_hdl_error(hdl, "error: %s: errno %d\n", - err_substr, errno); + err_substr, errno); etm_stats.etm_xport_open_fail.fmds_value.ui64++; return (nev); } else { @@ -559,7 +628,7 @@ if (etm_xport_close(hdl, conn) == NULL) { nev = (-errno); fmd_hdl_error(hdl, "warning: %s: errno %d\n", - err_substr, errno); + err_substr, errno); etm_stats.etm_xport_close_fail.fmds_value.ui64++; return (nev); } else { @@ -587,7 +656,7 @@ uint8_t *datap; /* ptr to data */ size_t mtu_sz; /* MTU size in bytes */ int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, - void *, size_t); + void *, size_t); size_t io_sz; /* byte count for io_func_ptr */ int try_cnt; /* number of tries done */ int sleep_sec; /* exp backoff sleep period in sec */ @@ -636,7 +705,7 @@ /* when give up, return -errno value even if partly done */ while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == - (-EAGAIN)) { + (-EAGAIN)) { try_cnt++; if (try_cnt > ETM_TRY_MAX_CNT) { rv = n; @@ -651,12 +720,12 @@ goto func_ret; } sleep_sec = ((sleep_sec == 0) ? 1 : - (sleep_sec * ETM_TRY_BACKOFF_RATE)); + (sleep_sec * ETM_TRY_BACKOFF_RATE)); sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); io_retry_stat.fmds_value.ui64++; if (etm_debug_lvl >= 1) { fmd_hdl_debug(hdl, "info: retrying io op %d " - "due to EAGAIN\n", io_op); + "due to EAGAIN\n", io_op); } } /* while trying the io operation */ @@ -684,11 +753,11 @@ if (rv < 0) { io_fail_stat.fmds_value.ui64++; fmd_hdl_debug(hdl, "error: %s: errno %d\n", - err_substr, (int)(-rv)); + err_substr, (int)(-rv)); } if (etm_debug_lvl >= 3) { fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", - io_op, (int)rv, (int)byte_cnt); + io_op, (int)rv, (int)byte_cnt); } return (rv); @@ -726,7 +795,7 @@ while (magic_num != ETM_PROTO_MAGIC_NUM) { if ((n = etm_io_op(hdl, "bad io read on magic", - conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { + conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { rv = n; goto func_ret; } @@ -752,13 +821,13 @@ if (byte_cnt != sizeof (magic_num)) { fmd_hdl_debug(hdl, "warning: bad proto frame " - "implies corrupt/lost msg(s)\n"); + "implies corrupt/lost msg(s)\n"); } if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); fmd_hdl_debug(hdl, "info: magic drop hexdump " - "first %d of %d bytes:\n", - i, byte_cnt - sizeof (magic_num)); + "first %d of %d bytes:\n", i, + byte_cnt - sizeof (magic_num)); etm_hexdump(hdl, drop_buf, i); } @@ -806,9 +875,8 @@ /* read the rest of the protocol preamble all at once */ if ((n = etm_io_op(hdl, "bad io read on preamble", - conn, &pp.pp_proto_ver, - sizeof (pp) - sizeof (pp.pp_magic_num), - ETM_IO_OP_RD)) < 0) { + conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num), + ETM_IO_OP_RD)) < 0) { errno = (-n); return (NULL); } @@ -829,7 +897,7 @@ if ((pp.pp_proto_ver < ETM_PROTO_V1) || (pp.pp_proto_ver > ETM_PROTO_V3)) { fmd_hdl_error(hdl, "error: bad proto ver %d\n", - (int)pp.pp_proto_ver); + (int)pp.pp_proto_ver); errno = EPROTO; etm_stats.etm_ver_bad.fmds_value.ui64++; return (NULL); @@ -868,14 +936,13 @@ do { i++; lenp++; if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= - ETM_MISC_BUF_SZ) { + ETM_MISC_BUF_SZ) { errno = E2BIG; /* ridiculous size */ etm_stats.etm_evlens_bad.fmds_value.ui64++; return (NULL); } if ((n = etm_io_op(hdl, "bad io read on event len", - conn, lenp, sizeof (*lenp), - ETM_IO_OP_RD)) < 0) { + conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) { errno = (-n); return (NULL); } @@ -898,7 +965,7 @@ if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", - (int)ctl_hdrp->ctl_pp.pp_sub_type); + (int)ctl_hdrp->ctl_pp.pp_sub_type); errno = EBADMSG; etm_stats.etm_subtype_bad.fmds_value.ui64++; return (NULL); @@ -907,9 +974,8 @@ /* get the control length */ if ((n = etm_io_op(hdl, "bad io read on ctl len", - conn, &ctl_hdrp->ctl_len, - sizeof (ctl_hdrp->ctl_len), - ETM_IO_OP_RD)) < 0) { + conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len), + ETM_IO_OP_RD)) < 0) { errno = (-n); return (NULL); } @@ -927,7 +993,7 @@ /* sanity check the header's timeout */ if (resp_hdrp->resp_pp.pp_timeout != - ETM_PROTO_V1_TIMEOUT_NONE) { + ETM_PROTO_V1_TIMEOUT_NONE) { errno = ETIME; etm_stats.etm_timeout_bad.fmds_value.ui64++; return (NULL); @@ -936,10 +1002,10 @@ /* get the response code and length */ if ((n = etm_io_op(hdl, "bad io read on resp code+len", - conn, &resp_hdrp->resp_code, - sizeof (resp_hdrp->resp_code) + - sizeof (resp_hdrp->resp_len), - ETM_IO_OP_RD)) < 0) { + conn, &resp_hdrp->resp_code, + sizeof (resp_hdrp->resp_code) + + sizeof (resp_hdrp->resp_len), + ETM_IO_OP_RD)) < 0) { errno = (-n); return (NULL); } @@ -966,10 +1032,10 @@ /* get the priority and length */ if ((n = etm_io_op(hdl, "bad io read on sa priority+len", - conn, &sa_hdrp->sa_priority, - sizeof (sa_hdrp->sa_priority) + - sizeof (sa_hdrp->sa_len), - ETM_IO_OP_RD)) < 0) { + conn, &sa_hdrp->sa_priority, + sizeof (sa_hdrp->sa_priority) + + sizeof (sa_hdrp->sa_len), + ETM_IO_OP_RD)) < 0) { errno = (-n); return (NULL); } @@ -992,8 +1058,7 @@ (void) memcpy(hdrp, misc_buf, hdr_sz); if (etm_debug_lvl >= 3) { - fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", - hdr_sz); + fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz); etm_hexdump(hdl, hdrp, hdr_sz); } *szp = hdr_sz; @@ -1054,6 +1119,8 @@ /* indicate 1 FMA event, network encode its length, and 0-terminate */ + etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1; + *lenp = evsz; *lenp = htonl(*lenp); lenp++; *lenp = 0; *lenp = htonl(*lenp); lenp++; @@ -1063,7 +1130,7 @@ */ if ((n = etm_io_op(hdl, "bad io write on event hdr", - conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { + conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { errno = (-n); fmd_hdl_free(hdl, hdrp, hdr_sz); return (NULL); @@ -1231,26 +1298,24 @@ if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { fmd_hdl_error(hdl, - "error: bad ctl dst addrs errno %d\n", errno); + "error: bad ctl dst addrs errno %d\n", errno); etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; goto func_ret; } for (i = 0; addrv[i] != NULL; i++) { - etm_stats.etm_xport_open_fail.fmds_value.ui64++; if (etm_conn_open(hdl, "bad conn open during ver negot", - addrv[i], &conn) < 0) { + addrv[i], &conn) < 0) { continue; } if (etm_io_op(hdl, "bad io write on ctl hdr+body", - conn, ctl_hdrp, hdr_sz + body_sz, - ETM_IO_OP_WR) >= 0) { + conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) { etm_stats.etm_wr_hdr_control.fmds_value.ui64++; etm_stats.etm_wr_body_control.fmds_value.ui64++; } (void) etm_conn_close(hdl, "bad conn close during ver negot", - conn); + conn); } /* foreach dst addr */ @@ -1264,39 +1329,92 @@ } /* etm_req_ver_negot() */ /* - * Design_Note: We rely on the fact that all message types have - * a common protocol preamble; if this fact should - * ever change it may break the code below. We also - * rely on the fact that FMA_EVENT and CONTROL headers - * returned will be sized large enough to reuse them - * as RESPONSE headers if the remote endpt asked - * for a response via the pp_timeout field. + * Design_Note: For all etm_resp_q_*() functions and etm_resp_q_* globals, + * the mutex etm_resp_q_lock must be held by the caller. */ /* - * etm_maybe_send_response - check the given message header to see + * etm_resp_q_enq - add element to tail of ETM responder queue + * etm_resp_q_deq - del element from head of ETM responder queue + * + * return >0 for success, or -errno value + */ + +static int +etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) +{ + etm_resp_q_ele_t *newp; /* ptr to new resp q ele */ + + if (etm_resp_q_cur_len >= etm_resp_q_max_len) { + fmd_hdl_debug(hdl, "warning: enq to full responder queue\n"); + etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++; + return (-E2BIG); + } + + newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); + (void) memcpy(newp, rqep, sizeof (*newp)); + newp->rqe_nextp = NULL; + + if (etm_resp_q_cur_len == 0) { + etm_resp_q_head = newp; + } else { + etm_resp_q_tail->rqe_nextp = newp; + } + etm_resp_q_tail = newp; + etm_resp_q_cur_len++; + etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; + + return (1); + +} /* etm_resp_q_enq() */ + +static int +etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) +{ + etm_resp_q_ele_t *oldp; /* ptr to old resp q ele */ + + if (etm_resp_q_cur_len == 0) { + fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); + etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++; + return (-ENOENT); + } + + (void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep)); + rqep->rqe_nextp = NULL; + + oldp = etm_resp_q_head; + etm_resp_q_head = etm_resp_q_head->rqe_nextp; + fmd_hdl_free(hdl, oldp, sizeof (*oldp)); + + etm_resp_q_cur_len--; + etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; + if (etm_resp_q_cur_len == 0) { + etm_resp_q_tail = NULL; + } + + return (1); + +} /* etm_resp_q_deq() */ + +/* + * etm_maybe_enq_response - check the given message header to see * whether a response has been requested, - * if so then send an appropriate response - * back on the given connection using the - * given response code, - * return 0 or -errno value + * if so then enqueue the given connection + * and header for later transport by the + * responder thread as an ETM response msg, + * return 0 for nop, >0 success, or -errno value */ static ssize_t -etm_maybe_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, - void *hdrp, int32_t resp_code) +etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, + void *hdrp, uint32_t hdr_sz, int32_t resp_code) { ssize_t rv; /* ret val */ etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ - etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ - uint8_t resp_body[4]; /* response body if needed */ - uint8_t *resp_msg; /* response hdr+body */ - size_t hdr_sz; /* sizeof response hdr */ uint8_t orig_msg_type; /* orig hdr's message type */ uint32_t orig_timeout; /* orig hdr's timeout */ - ssize_t n; /* gen use */ - - rv = 0; /* default is success */ + etm_resp_q_ele_t rqe; /* responder queue ele */ + ppp = hdrp; orig_msg_type = ppp->pp_msg_type; orig_timeout = ppp->pp_timeout; @@ -1310,15 +1428,69 @@ if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && (orig_msg_type != ETM_MSG_TYPE_ALERT) && (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { + fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n", + orig_msg_type); return (-EINVAL); } /* if inappropriate hdr for a response msg */ - /* reuse the given header as a response header */ + /* + * enqueue the msg hdr and nudge the responder thread + * if the responder queue was previously empty + */ + + rqe.rqe_conn = conn; + rqe.rqe_hdrp = hdrp; + rqe.rqe_hdr_sz = hdr_sz; + rqe.rqe_resp_code = resp_code; + + (void) pthread_mutex_lock(&etm_resp_q_lock); + rv = etm_resp_q_enq(hdl, &rqe); + if (etm_resp_q_cur_len == 1) + (void) pthread_cond_signal(&etm_resp_q_cv); + (void) pthread_mutex_unlock(&etm_resp_q_lock); + + return (rv); + +} /* etm_maybe_enq_response() */ + +/* + * Design_Note: We rely on the fact that all message types have + * a common protocol preamble; if this fact should + * ever change it may break the code below. We also + * rely on the fact that FMA_EVENT and CONTROL headers + * returned by etm_hdr_read() will be sized large enough + * to reuse them as RESPONSE headers if the remote endpt + * asked for a response via the pp_timeout field. + */ + +/* + * etm_send_response - use the given message header and response code + * to construct an appropriate response message, + * and send it back on the given connection, + * return >0 for success, or -errno value + */ + +static ssize_t +etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, + void *hdrp, int32_t resp_code) +{ + ssize_t rv; /* ret val */ + etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ + etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ + uint8_t resp_body[4]; /* response body if needed */ + uint8_t *resp_msg; /* response hdr+body */ + size_t hdr_sz; /* sizeof response hdr */ + uint8_t orig_msg_type; /* orig hdr's message type */ + + ppp = hdrp; + orig_msg_type = ppp->pp_msg_type; if (etm_debug_lvl >= 2) { etm_show_time(hdl, "ante resp send"); } + /* reuse the given header as a response header */ + resp_hdrp = hdrp; resp_hdrp->resp_code = resp_code; resp_hdrp->resp_len = 0; /* default is empty body */ @@ -1340,10 +1512,7 @@ /* * send the whole response msg in one write, header and body; * avoid the alloc-and-copy if we can reuse the hdr as the msg, - * ie, if the body is empty - * - * update stats and note the xid associated with last ACKed FMA_EVENT - * known to be successfully posted to FMD to aid duplicate filtering + * ie, if the body is empty. update the response stats. */ hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); @@ -1358,22 +1527,16 @@ } (void) pthread_mutex_lock(&etm_write_lock); - if ((n = etm_io_op(hdl, "bad io write on resp msg", conn, - resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR)) < 0) { - (void) pthread_mutex_unlock(&etm_write_lock); - rv = n; + rv = etm_io_op(hdl, "bad io write on resp msg", conn, + resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR); + (void) pthread_mutex_unlock(&etm_write_lock); + if (rv < 0) { goto func_ret; } - (void) pthread_mutex_unlock(&etm_write_lock); etm_stats.etm_wr_hdr_response.fmds_value.ui64++; etm_stats.etm_wr_body_response.fmds_value.ui64++; - if ((orig_msg_type == ETM_MSG_TYPE_FMA_EVENT) && - (resp_code >= 0)) { - etm_xid_posted_ev = resp_hdrp->resp_pp.pp_xid; - } - fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " "xid 0x%x code %d len %u\n", (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, @@ -1389,7 +1552,7 @@ } return (rv); -} /* etm_maybe_send_response() */ +} /* etm_send_response() */ /* * etm_handle_new_conn - receive an ETM message sent from the other end via @@ -1407,6 +1570,7 @@ etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ int32_t resp_code; /* response code */ + ssize_t enq_rv; /* resp_q enqueue status */ size_t hdr_sz; /* sizeof header */ uint8_t *body_buf; /* msg body buffer */ uint32_t body_sz; /* sizeof body_buf */ @@ -1429,13 +1593,14 @@ class = NULL; evp = NULL; resp_code = 0; /* default is success */ + enq_rv = 0; /* default is nop, ie, did not enqueue */ /* read a network decoded message header from the connection */ if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { /* errno assumed set by above call */ fmd_hdl_debug(hdl, "error: FMA event dropped: " - "bad hdr read errno %d\n", errno); + "bad hdr read errno %d\n", errno); etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; goto func_ret; } @@ -1449,25 +1614,14 @@ fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); - /* - * check for dup msg/xid against last good response sent, - * if a dup then resend response but skip repost to FMD - */ - - if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_ev) { - (void) etm_maybe_send_response(hdl, conn, ev_hdrp, 0); - fmd_hdl_debug(hdl, "info: skipping dup FMA event post " - "xid 0x%x\n", etm_xid_posted_ev); - etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; - goto func_ret; - } - /* allocate buf large enough for whole body / all FMA events */ body_sz = 0; for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { body_sz += ev_hdrp->ev_lens[i]; } /* for summing sizes of all FMA events */ + if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64) + etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i; ev_cnt = i; if (etm_debug_lvl >= 1) { @@ -1480,9 +1634,8 @@ /* read all the FMA events at once */ if ((n = etm_io_op(hdl, "FMA event dropped: " - "bad io read on event bodies", - conn, body_buf, body_sz, - ETM_IO_OP_RD)) < 0) { + "bad io read on event bodies", conn, body_buf, body_sz, + ETM_IO_OP_RD)) < 0) { etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; goto func_ret; } @@ -1490,45 +1643,63 @@ etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; + /* + * now that we've read the entire ETM msg from the conn, + * which avoids later ETM protocol framing errors if we didn't, + * check for dup msg/xid against last good FMD posting, + * if a dup then resend response but skip repost to FMD + */ + + if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_ev) { + enq_rv = etm_maybe_enq_response(hdl, conn, + ev_hdrp, hdr_sz, 0); + fmd_hdl_debug(hdl, "info: skipping dup FMA event post " + "xid 0x%x\n", etm_xid_posted_ev); + etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; + goto func_ret; + } + /* unpack each FMA event and post it to FMD */ bp = body_buf; for (i = 0; i < ev_cnt; i++) { if ((n = nvlist_unpack((char *)bp, - ev_hdrp->ev_lens[i], &evp, 0)) != 0) { + ev_hdrp->ev_lens[i], &evp, 0)) != 0) { resp_code = (-n); - (void) etm_maybe_send_response(hdl, conn, - ev_hdrp, resp_code); + enq_rv = etm_maybe_enq_response(hdl, conn, + ev_hdrp, hdr_sz, resp_code); fmd_hdl_error(hdl, "error: FMA event dropped: " - "bad event body unpack " - "errno %d\n", n); + "bad event body unpack errno %d\n", n); if (etm_debug_lvl >= 2) { fmd_hdl_debug(hdl, "info: FMA event " - "hexdump %d bytes:\n", - ev_hdrp->ev_lens[i]); + "hexdump %d bytes:\n", + ev_hdrp->ev_lens[i]); etm_hexdump(hdl, bp, - ev_hdrp->ev_lens[i]); + ev_hdrp->ev_lens[i]); } etm_stats.etm_os_nvlist_unpack_fail.fmds_value. - ui64++; + ui64++; etm_stats.etm_rd_drop_fmaevent.fmds_value. - ui64++; + ui64++; bp += ev_hdrp->ev_lens[i]; continue; } if (etm_debug_lvl >= 1) { (void) nvlist_lookup_string(evp, FM_CLASS, - &class); + &class); if (class == NULL) { class = "NULL"; } fmd_hdl_debug(hdl, "info: FMA event %p " - "class %s\n", evp, class); + "class %s\n", evp, class); } resp_code = etm_post_to_fmd(hdl, evp); + if (resp_code >= 0) { + etm_xid_posted_ev = ev_hdrp->ev_pp.pp_xid; + } evp = NULL; - (void) etm_maybe_send_response(hdl, conn, - ev_hdrp, resp_code); + enq_rv = etm_maybe_enq_response(hdl, conn, + ev_hdrp, hdr_sz, resp_code); bp += ev_hdrp->ev_lens[i]; } /* foreach FMA event in the body buffer */ @@ -1539,8 +1710,8 @@ fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); if (etm_debug_lvl >= 1) { fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", - (int)ctl_hdrp->ctl_pp.pp_sub_type, - ctl_hdrp->ctl_pp.pp_xid); + (int)ctl_hdrp->ctl_pp.pp_sub_type, + ctl_hdrp->ctl_pp.pp_xid); } /* @@ -1557,8 +1728,7 @@ body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); if ((n = etm_io_op(hdl, "bad io read on ctl body", - conn, body_buf, body_sz, - ETM_IO_OP_RD)) < 0) { + conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { goto func_ret; } @@ -1580,7 +1750,8 @@ etm_stats.etm_rd_body_control.fmds_value.ui64++; - (void) etm_maybe_send_response(hdl, conn, ctl_hdrp, resp_code); + enq_rv = etm_maybe_enq_response(hdl, conn, + ctl_hdrp, hdr_sz, resp_code); } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { @@ -1589,14 +1760,14 @@ fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); if (etm_debug_lvl >= 1) { fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", - (int)resp_hdrp->resp_pp.pp_xid); + (int)resp_hdrp->resp_pp.pp_xid); } body_sz = resp_hdrp->resp_len; body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); if ((n = etm_io_op(hdl, "bad io read on resp len", - conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { + conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { goto func_ret; } @@ -1631,35 +1802,63 @@ fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); if (etm_debug_lvl >= 1) { fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", - (int)sa_hdrp->sa_pp.pp_sub_type, - sa_hdrp->sa_pp.pp_xid); + (int)sa_hdrp->sa_pp.pp_sub_type, + sa_hdrp->sa_pp.pp_xid); } body_sz = sa_hdrp->sa_len; body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); if ((n = etm_io_op(hdl, "bad io read on sa body", - conn, body_buf, body_sz, - ETM_IO_OP_RD)) < 0) { + conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { goto func_ret; } etm_stats.etm_rd_body_alert.fmds_value.ui64++; + /* + * now that we've read the entire ETM msg from the conn, + * which avoids later ETM protocol framing errors if we didn't, + * check for dup msg/xid against last good syslog posting, + * if a dup then resend response but skip repost to syslog + */ + + if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) { + enq_rv = etm_maybe_enq_response(hdl, conn, + sa_hdrp, hdr_sz, 0); + fmd_hdl_debug(hdl, "info: skipping dup ALERT post " + "xid 0x%x\n", etm_xid_posted_sa); + etm_stats.etm_rd_dup_alert.fmds_value.ui64++; + goto func_ret; + } + resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, body_sz, body_buf); - (void) etm_maybe_send_response(hdl, conn, sa_hdrp, resp_code); + if (resp_code >= 0) { + etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid; + } + enq_rv = etm_maybe_enq_response(hdl, conn, + sa_hdrp, hdr_sz, resp_code); } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ func_ret: - (void) etm_conn_close(hdl, "bad conn close after msg recv", conn); - if (etm_debug_lvl >= 2) { etm_show_time(hdl, "post conn handle"); } - if (ev_hdrp != NULL) { - fmd_hdl_free(hdl, ev_hdrp, hdr_sz); + + /* + * if no responder ele was enqueued, close the conn now + * and free the ETM msg hdr; the ETM msg body is not needed + * by the responder thread and should always be freed here + */ + + if (enq_rv <= 0) { + (void) etm_conn_close(hdl, "bad conn close after msg recv", + conn); + if (ev_hdrp != NULL) { + fmd_hdl_free(hdl, ev_hdrp, hdr_sz); + } } if (body_buf != NULL) { fmd_hdl_free(hdl, body_buf, body_sz); @@ -1693,24 +1892,14 @@ break; } fmd_hdl_debug(hdl, - "error: bad conn accept errno %d\n", n); + "error: bad conn accept errno %d\n", n); etm_stats.etm_xport_accept_fail.fmds_value.ui64++; /* avoid spinning CPU */ (void) etm_sleep(ETM_SLEEP_SLOW); continue; } - /* - * Design_Note: etm_handle_new_conn() will close the - * accepted connection when done. In early designs - * etm_handle_new_conn() was spawned as a - * separate thread via pthread_create(); - * however fmd_thr_create() constrains thread - * creation to prevent spawned threads from - * spawning others (ie, no grandchildren). - * Hence etm_handle_new_conn() is now called - * as a simple function [w/ multiple args]. - */ + /* handle the new message/connection, closing it when done */ etm_handle_new_conn(hdl, conn); @@ -1718,10 +1907,109 @@ /* ETM is dying (probably due to "fmadm unload etm") */ - if (etm_debug_lvl >= 1) { - fmd_hdl_debug(hdl, "info: connection server is dying\n"); + fmd_hdl_debug(hdl, "info: connection server is dying\n"); + +} /* etm_server() */ + +/* + * etm_responder - loop forever waiting for new responder queue elements + * to be enqueued, for each one constructing and sending + * an ETM response msg to the other side, and closing its + * associated connection when appropriate + * + * this thread exists to ensure that the etm_server() thread + * never pends indefinitely waiting on the xport write lock, and is + * hence always available to accept new connections and handle + * incoming messages + * + * this design relies on the fact that each connection accepted and + * returned by the ETM xport layer is unique, and each can be closed + * independently of the others while multiple connections are + * outstanding + */ + +static void +etm_responder(void *arg) +{ + ssize_t n; /* gen use */ + fmd_hdl_t *hdl; /* FMD handle */ + etm_resp_q_ele_t rqe; /* responder queue ele */ + + hdl = arg; + + fmd_hdl_debug(hdl, "info: responder server starting\n"); + + while (!etm_is_dying) { + + (void) pthread_mutex_lock(&etm_resp_q_lock); + + while (etm_resp_q_cur_len == 0) { + (void) pthread_cond_wait(&etm_resp_q_cv, + &etm_resp_q_lock); + if (etm_is_dying) { + (void) pthread_mutex_unlock(&etm_resp_q_lock); + goto func_ret; + } + } /* while the responder queue is empty, wait to be nudged */ + + /* + * for every responder ele that has been enqueued, + * dequeue and send it as an ETM response msg, + * closing its associated conn and freeing its hdr + * + * enter the queue draining loop holding the responder + * queue lock, but do not hold the lock indefinitely + * (the actual send may pend us indefinitely), + * so that other threads will never pend for long + * trying to enqueue a new element + */ + + while (etm_resp_q_cur_len > 0) { + + (void) etm_resp_q_deq(hdl, &rqe); + (void) pthread_mutex_unlock(&etm_resp_q_lock); + + if ((n = etm_send_response(hdl, rqe.rqe_conn, + rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) { + fmd_hdl_error(hdl, "error: bad resp send " + "errno %d\n", (-n)); + } + + (void) etm_conn_close(hdl, "bad conn close after resp", + rqe.rqe_conn); + fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); + + if (etm_is_dying) { + goto func_ret; + } + (void) pthread_mutex_lock(&etm_resp_q_lock); + + } /* while draining the responder queue */ + + (void) pthread_mutex_unlock(&etm_resp_q_lock); + + } /* while awaiting and sending resp msgs until ETM dies */ + +func_ret: + + /* ETM is dying (probably due to "fmadm unload etm") */ + + fmd_hdl_debug(hdl, "info: responder server is dying\n"); + + (void) pthread_mutex_lock(&etm_resp_q_lock); + if (etm_resp_q_cur_len > 0) { + fmd_hdl_error(hdl, "warning: %d response msgs dropped\n", + (int)etm_resp_q_cur_len); + while (etm_resp_q_cur_len > 0) { + (void) etm_resp_q_deq(hdl, &rqe); + (void) etm_conn_close(hdl, "bad conn close after deq", + rqe.rqe_conn); + fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); + } } -} /* etm_server() */ + (void) pthread_mutex_unlock(&etm_resp_q_lock); + +} /* etm_responder() */ static void * etm_init_alloc(size_t size) @@ -1779,15 +2067,17 @@ /* setup statistics and properties from FMD */ (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, - sizeof (etm_stats) / sizeof (fmd_stat_t), - (fmd_stat_t *)&etm_stats); + sizeof (etm_stats) / sizeof (fmd_stat_t), (fmd_stat_t *)&etm_stats); etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, - ETM_PROP_NM_DEBUG_MAX_EV_CNT); + ETM_PROP_NM_DEBUG_MAX_EV_CNT); fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " - "etm_debug_max_ev_cnt %d\n", - etm_debug_lvl, etm_debug_max_ev_cnt); + "etm_debug_max_ev_cnt %d\n", etm_debug_lvl, etm_debug_max_ev_cnt); + + etm_resp_q_max_len = fmd_prop_get_int32(hdl, + ETM_PROP_NM_MAX_RESP_Q_LEN); + etm_stats.etm_resp_q_max_len.fmds_value.ui64 = etm_resp_q_max_len; /* obtain an FMD transport handle so we can post FMA events later */ @@ -1799,11 +2089,7 @@ etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | ((unsigned long)tmv.tv_usec >> 10)); - /* - * init the transport, - * start the connection acceptance server, and - * request protocol version be negotiated - */ + /* init the ETM transport */ if ((n = etm_xport_init(hdl)) != 0) { fmd_hdl_error(hdl, "error: bad xport init errno %d\n", (-n)); @@ -1854,14 +2140,16 @@ fmd_prop_free_string(hdl, facname); } + /* + * start the message responder and the connection acceptance server; + * request protocol version be negotiated after waiting a second + * for the receiver to be ready to start handshaking + */ + + etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl); etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); - /* - * Wait a second for the receiver to be ready before start handshaking - * with the SP. - */ (void) etm_sleep(ETM_SLEEP_QUIK); - etm_req_ver_negot(hdl); fmd_hdl_debug(hdl, "info: module initialized ok\n"); @@ -1886,13 +2174,20 @@ uint8_t *buf; /* tmp buffer for packed FMA event */ buflen = 0; - (void) nvlist_size(evp, &buflen, NV_ENCODE_XDR); - etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; - etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; + if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) { + fmd_hdl_error(hdl, "error: FMA event dropped: " + "event size errno %d class %s\n", n, class); + etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; + etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; + return; + } fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", - etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); + etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); + + etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; + etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; /* * if the debug limit has been set, avoid excessive traffic, @@ -1900,12 +2195,12 @@ */ if ((etm_debug_max_ev_cnt >= 0) && - (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > - etm_debug_max_ev_cnt)) { + (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > + etm_debug_max_ev_cnt)) { fmd_hdl_debug(hdl, "warning: FMA event dropped: " - "event %p cnt %llu > debug max %d\n", evp, - etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, - etm_debug_max_ev_cnt); + "event %p cnt %llu > debug max %d\n", evp, + etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, + etm_debug_max_ev_cnt); etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; return; } @@ -1915,9 +2210,9 @@ buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); if ((n = nvlist_pack(evp, (char **)&buf, &buflen, - NV_ENCODE_XDR, 0)) != 0) { + NV_ENCODE_XDR, 0)) != 0) { fmd_hdl_error(hdl, "error: FMA event dropped: " - "event pack errno %d\n", n); + "event pack errno %d class %s\n", n, class); etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; fmd_hdl_free(hdl, buf, buflen); @@ -1928,7 +2223,7 @@ if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { fmd_hdl_error(hdl, "error: FMA event dropped: " - "bad event dst addrs errno %d\n", errno); + "bad event dst addrs errno %d\n", errno); etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; fmd_hdl_free(hdl, buf, buflen); @@ -1940,8 +2235,7 @@ /* open a new connection to this dst addr */ if ((n = etm_conn_open(hdl, "FMA event dropped: " - "bad conn open on new ev", - addrv[i], &conn)) < 0) { + "bad conn open on new ev", addrv[i], &conn)) < 0) { etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; continue; } @@ -1951,12 +2245,12 @@ /* write the ETM message header */ if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, - &sz)) == NULL) { + &sz)) == NULL) { (void) pthread_mutex_unlock(&etm_write_lock); fmd_hdl_error(hdl, "error: FMA event dropped: " - "bad hdr write errno %d\n", errno); + "bad hdr write errno %d\n", errno); (void) etm_conn_close(hdl, - "bad conn close per bad hdr wr", conn); + "bad conn close per bad hdr wr", conn); etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; continue; } @@ -1964,16 +2258,16 @@ fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", - evp); + evp); /* write the ETM message body, ie, the packed nvlist */ if ((n = etm_io_op(hdl, "FMA event dropped: " - "bad io write on event", conn, - buf, buflen, ETM_IO_OP_WR)) < 0) { + "bad io write on event", conn, + buf, buflen, ETM_IO_OP_WR)) < 0) { (void) pthread_mutex_unlock(&etm_write_lock); (void) etm_conn_close(hdl, - "bad conn close per bad body wr", conn); + "bad conn close per bad body wr", conn); etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; continue; } @@ -1983,12 +2277,12 @@ etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", - evp); + evp); /* close the connection */ (void) etm_conn_close(hdl, "bad conn close after event send", - conn); + conn); } /* foreach dst addr in the vector */ etm_xport_free_addrv(hdl, addrv); @@ -2005,9 +2299,9 @@ { ssize_t n; /* gen use */ - fmd_hdl_debug(hdl, "info: module finializing\n"); - - /* kill the connection server ; wait for it to die */ + fmd_hdl_debug(hdl, "info: module finalizing\n"); + + /* kill the connection server and responder ; wait for them to die */ etm_is_dying = 1; @@ -2017,7 +2311,13 @@ etm_svr_tid = NULL; } /* if server thread was successfully created */ - /* teardown the transport */ + if (etm_resp_tid != NULL) { + fmd_thr_signal(hdl, etm_resp_tid); + fmd_thr_destroy(hdl, etm_resp_tid); + etm_resp_tid = NULL; + } /* if responder thread was successfully created */ + + /* teardown the transport and cleanup syslogging */ if ((n = etm_xport_fini(hdl)) != 0) { fmd_hdl_error(hdl, "warning: xport fini errno %d\n", (-n)); diff -r ffbab30c82bd -r 15cab3a79834 usr/src/cmd/fm/modules/sun4v/etm/etm_impl.h --- a/usr/src/cmd/fm/modules/sun4v/etm/etm_impl.h Tue Sep 11 16:39:42 2007 -0700 +++ b/usr/src/cmd/fm/modules/sun4v/etm/etm_impl.h Wed Sep 12 09:45:10 2007 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -92,6 +92,8 @@ #define ETM_PROP_NM_SYSLOGD "etm_alert_syslog" #define ETM_PROP_NM_FACILITY "etm_alert_facility" +#define ETM_PROP_NM_MAX_RESP_Q_LEN "etm_resp_q_max_len" + /* * --------------------------------- prolog ---------------------------------- */ diff -r ffbab30c82bd -r 15cab3a79834 usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c --- a/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c Tue Sep 11 16:39:42 2007 -0700 +++ b/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c Wed Sep 12 09:45:10 2007 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,7 +42,6 @@ #include #include - #include #include @@ -218,28 +217,6 @@ * -------------------------- private variables ------------------------------ */ -/* - * Design_Note: - * - * Access to the transport for receiving is serialized so that if two threads - * exist, one for accepting new connections and one for reading on an - * accepted connection, they don't race with each other. A pingpong access - * pattern is enforced/ensured using a mutex etm_xport_ser_lock. To avoid - * deadlocks caused by locking the mutex inside accept() and open(), only - * accept() is covered with an approrpriate unlock inside close() using - * etm_xport_ser_conn to notice the proper connection and when to unlock. - * - * This could've been done within ETM [inside the pertinent threads] - * more easily; but because it's platform specific it's being done here - * within the ETM-to-Transport API. - */ - -static pthread_mutex_t -etm_xport_ser_lock; /* xport access serialization lock */ - -static _etm_xport_conn_t * -etm_xport_ser_conn = NULL; /* serialization connection handle */ - static _etm_xport_conn_t * etm_xport_vldc_conn = NULL; /* single connection handle for VLDC */ @@ -301,7 +278,7 @@ } if (stat_buf.st_size > 0) { n = MIN(peek_ctl_ptr->pk_buflen, - stat_buf.st_size); + stat_buf.st_size); peek_ctl_ptr->pk_buflen = n; /* return bogus data assuming content unused */ (void) memset(peek_ctl_ptr->pk_buf, 0xA5, n); @@ -356,7 +333,7 @@ return (fn_wr); } if (((io_op == ETM_IO_OP_RD) || (io_op == ETM_IO_OP_PK)) && - (fn_rd[0] != '\0')) { + (fn_rd[0] != '\0')) { return (fn_rd); } @@ -365,7 +342,7 @@ prop_str = fmd_prop_get_string(hdl, ETM_PROP_NM_XPORT_ADDRS); if (etm_xport_debug_lvl >= 2) { fmd_hdl_debug(hdl, "info: etm_xport_get_fn prop_str %s\n", - prop_str); + prop_str); } if (strlen(prop_str) == 0) { @@ -411,7 +388,7 @@ if (etm_xport_debug_lvl >= 2) { fmd_hdl_debug(hdl, "info: etm_xport_get_fn fn_wr %s fn_rd %s\n", - fn_wr, fn_rd); + fn_wr, fn_rd); } fmd_prop_free_string(hdl, prop_str); return (rv); @@ -521,7 +498,6 @@ * into the caller's given buffer, * return how many bytes actually peeked * or -errno value - * * caveats: * peeked data is NOT guaranteed by all platform transports * to remain enqueued if this process/thread crashes; @@ -561,7 +537,7 @@ if (etm_xport_should_fake_dd) { n = etm_fake_ioctl(_conn->fd, ETM_XPORT_IOCTL_DATA_PEEK, - &peek_ctl); + &peek_ctl); } else { n = ioctl(_conn->fd, ETM_XPORT_IOCTL_DATA_PEEK, &peek_ctl); } @@ -575,7 +551,7 @@ if (etm_xport_debug_lvl >= 3) { fmd_hdl_debug(hdl, "info: [fake] ioctl(_PEEK) ~= %d bytes\n", - rv); + rv); } return (rv); @@ -633,7 +609,7 @@ if (etm_xport_irb_mtu_sz == 0) { if ((n = etm_xport_get_opt(hdl, _conn, - ETM_XPORT_OPT_MTU_SZ)) < 0) { + ETM_XPORT_OPT_MTU_SZ)) < 0) { etm_xport_irb_mtu_sz = ETM_XPORT_MTU_SZ_DEF; } else { etm_xport_irb_mtu_sz = n; @@ -641,7 +617,7 @@ } if (etm_xport_irb_area == NULL) { etm_xport_irb_area = fmd_hdl_zalloc(hdl, - 2 * etm_xport_irb_mtu_sz, FMD_SLEEP); + 2 * etm_xport_irb_mtu_sz, FMD_SLEEP); etm_xport_irb_head = etm_xport_irb_area; etm_xport_irb_tail = etm_xport_irb_head; } @@ -660,7 +636,7 @@ etm_xport_irb_head += byte_cnt; if (etm_xport_debug_lvl >= 2) { fmd_hdl_debug(hdl, "info: quik buffered read == %d\n", - byte_cnt); + byte_cnt); } return (byte_cnt); } @@ -694,7 +670,7 @@ i = etm_xport_irb_mtu_sz; } else { if ((i = etm_xport_raw_peek(hdl, _conn, etm_xport_irb_tail, - etm_xport_irb_mtu_sz)) < 0) { + etm_xport_irb_mtu_sz)) < 0) { return (i); } } @@ -751,13 +727,13 @@ /* setup statistics and properties from FMD */ (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, - sizeof (etm_xport_stats) / sizeof (fmd_stat_t), - (fmd_stat_t *)&etm_xport_stats); + sizeof (etm_xport_stats) / sizeof (fmd_stat_t), + (fmd_stat_t *)&etm_xport_stats); etm_xport_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); etm_xport_addrs = fmd_prop_get_string(hdl, ETM_PROP_NM_XPORT_ADDRS); fmd_hdl_debug(hdl, "info: etm_xport_debug_lvl %d\n", - etm_xport_debug_lvl); + etm_xport_debug_lvl); fmd_hdl_debug(hdl, "info: etm_xport_addrs %s\n", etm_xport_addrs); /* decide whether to fake [some of] the device driver behavior */ @@ -768,7 +744,7 @@ if (stat(fn, &stat_buf) < 0) { /* errno assumed set by above call */ fmd_hdl_error(hdl, "error: bad device node %s errno %d\n", - fn, errno); + fn, errno); rv = (-errno); goto func_ret; } @@ -776,7 +752,7 @@ etm_xport_should_fake_dd = 1; /* not a char driver */ } fmd_hdl_debug(hdl, "info: etm_xport_should_fake_dd %d\n", - etm_xport_should_fake_dd); + etm_xport_should_fake_dd); /* validate each default dst transport address */ @@ -789,17 +765,12 @@ for (i = 0; _addrv[i] != NULL; i++) { if ((n = etm_xport_valid_addr(_addrv[i])) < 0) { fmd_hdl_error(hdl, "error: bad xport addr %p\n", - _addrv[i]); + _addrv[i]); rv = n; goto func_ret; } } /* foreach dst addr */ - /* create mutex for xport access serialization */ - - (void) pthread_mutex_init(&etm_xport_ser_lock, NULL); - etm_xport_ser_conn = NULL; - if (use_vldc) { etm_xport_vldc_conn = etm_xport_open(hdl, _addrv[0]); if (etm_xport_vldc_conn == NULL) { @@ -852,11 +823,12 @@ if (use_vldc == 0 || etm_xport_vldc_conn == NULL) { if ((_conn->fd = open(_addr->fn, - ETM_XPORT_OPEN_FLAGS, 0)) == -1) { + ETM_XPORT_OPEN_FLAGS, 0)) == -1) { /* errno assumed set by above call */ etm_xport_free_addr(hdl, _addr); fmd_hdl_free(hdl, _conn, sizeof (_etm_xport_conn_t)); etm_xport_stats.xport_os_open_fail.fmds_value.ui64++; + (void) pthread_mutex_unlock(&etm_xport_vldc_lock); return (NULL); } } @@ -875,6 +847,7 @@ etm_xport_free_addr(hdl, _addr); fmd_hdl_free(hdl, _conn, sizeof (_etm_xport_conn_t)); etm_xport_stats.xport_os_open_fail.fmds_value.ui64++; + (void) pthread_mutex_unlock(&etm_xport_vldc_lock); return (NULL); } @@ -965,15 +938,6 @@ goto func_ret; } - /* - * lock mutex to avoid race condition between handler for a new - * connection and the top level connection acceptance server; - * remember the connection handle for close - */ - - (void) pthread_mutex_lock(&etm_xport_ser_lock); - etm_xport_ser_conn = _conn; - if (etm_xport_should_fake_dd) { (void) nanosleep(&tms, NULL); /* delay [for resp capture] */ (void) ftruncate(_conn->fd, 0); /* act like socket/queue/pipe */ @@ -984,6 +948,7 @@ * behavior; this will pend until some ETM message is written * from the other end */ + if (use_vldc) { pollfd_t pollfd; @@ -1009,10 +974,6 @@ /* cleanup the connection if failed */ if (rv == NULL) { - if (etm_xport_ser_conn != NULL) { - (void) pthread_mutex_unlock(&etm_xport_ser_lock); - etm_xport_ser_conn = NULL; - } if (_conn != NULL) { (void) etm_xport_close(hdl, _conn); } @@ -1030,7 +991,7 @@ if (etm_xport_debug_lvl >= 2) { fmd_hdl_debug(hdl, "info: accept conn %p w/ *addrp %p\n", - rv, (addrp != NULL ? *addrp : NULL)); + rv, (addrp != NULL ? *addrp : NULL)); } return (rv); @@ -1077,16 +1038,6 @@ (void) pthread_mutex_unlock(&etm_xport_vldc_lock); - /* - * unlock the mutex after the device node is closed - * if this is the appropriate connection handle - */ - - if (_conn == etm_xport_ser_conn) { - etm_xport_ser_conn = NULL; - (void) pthread_mutex_unlock(&etm_xport_ser_lock); - } - func_ret: /* cleanup the connection */ @@ -1135,7 +1086,7 @@ */ _addr = fmd_hdl_zalloc(hdl, sizeof (_etm_xport_addr_t), - FMD_SLEEP); + FMD_SLEEP); } else { /* @@ -1145,13 +1096,13 @@ */ _addr = fmd_hdl_zalloc(hdl, sizeof (_etm_xport_addr_t), - FMD_SLEEP); + FMD_SLEEP); } /* whether caller passed in a FMA event */ /* allocate vector with 1 non-NULL transport addr */ _addrv = fmd_hdl_zalloc(hdl, 2 * sizeof (_etm_xport_addr_t *), - FMD_SLEEP); + FMD_SLEEP); _addr->fn = etm_xport_get_fn(hdl, ETM_IO_OP_WR); _addr->magic_num = ETM_XPORT_DD_MAGIC_ADDR; @@ -1222,19 +1173,19 @@ n = 0; for (i = 0; _connv[i] != NULL; i++) { if ((_connv[i]->addr == _addr) || - ((_connv[i]->addr != NULL) && - (_connv[i]->addr->fn == _addr->fn))) { + ((_connv[i]->addr != NULL) && + (_connv[i]->addr->fn == _addr->fn))) { n++; } } /* for counting how many addresses match */ _mcv = fmd_hdl_zalloc(hdl, (n + 1) * sizeof (_etm_xport_conn_t *), - FMD_SLEEP); + FMD_SLEEP); n = 0; for (i = 0; _connv[i] != NULL; i++) { if ((_connv[i]->addr == _addr) || - ((_connv[i]->addr != NULL) && - (_connv[i]->addr->fn == _addr->fn))) { + ((_connv[i]->addr != NULL) && + (_connv[i]->addr->fn == _addr->fn))) { _mcv[n] = _connv[i]; n++; } @@ -1291,11 +1242,6 @@ { fmd_hdl_debug(hdl, "info: xport finalizing\n"); - /* destroy the xport access serialization lock */ - - (void) pthread_mutex_destroy(&etm_xport_ser_lock); - etm_xport_ser_conn = NULL; - if (use_vldc && (etm_xport_vldc_conn != NULL)) { (void) etm_xport_close(hdl, etm_xport_vldc_conn); etm_xport_vldc_conn = NULL; @@ -1309,7 +1255,7 @@ if (etm_xport_irb_tail != etm_xport_irb_head) { fmd_hdl_debug(hdl, "warning: xport %d bytes stale data\n", - (int)(etm_xport_irb_tail - etm_xport_irb_head)); + (int)(etm_xport_irb_tail - etm_xport_irb_head)); } fmd_hdl_free(hdl, etm_xport_irb_area, 2 * etm_xport_irb_mtu_sz); etm_xport_irb_area = NULL; @@ -1408,8 +1354,7 @@ op_ctl.oo_opt = opt; if (etm_xport_should_fake_dd) { - n = etm_fake_ioctl(_conn->fd, ETM_XPORT_IOCTL_OPT_OP, - &op_ctl); + n = etm_fake_ioctl(_conn->fd, ETM_XPORT_IOCTL_OPT_OP, &op_ctl); } else if (use_vldc) { if (opt == ETM_XPORT_OPT_MTU_SZ) { vldc_opt_op_t operation;