Mercurial > illumos > illumos-gate
changeset 13066:feaeaa778d1c
6962670 MSG_EOR is set when a message is not completely received
6973505 (sctp) com/sun/nio/sctp/SctpMultiChannel/SocketOptionTests.java crashes a system
author | Kacheong Poon <Kacheong.Poon@Sun.COM> |
---|---|
date | Mon, 09 Aug 2010 20:07:20 -0700 |
parents | 9377d65d657e |
children | 5b1463e9bb94 |
files | usr/src/cmd/mdb/common/modules/sctp/sctp.c usr/src/uts/common/fs/sockfs/sockcommon.h usr/src/uts/common/fs/sockfs/sockcommon_subr.c usr/src/uts/common/fs/sockfs/sockfilter.c usr/src/uts/common/inet/sctp/sctp.c usr/src/uts/common/inet/sctp/sctp_conn.c usr/src/uts/common/inet/sctp/sctp_impl.h usr/src/uts/common/inet/sctp/sctp_input.c usr/src/uts/common/inet/sctp/sctp_opt_data.c usr/src/uts/common/inet/sctp/sctp_output.c usr/src/uts/common/inet/sockmods/socksctp.c usr/src/uts/common/inet/sockmods/socksctp.h usr/src/uts/common/inet/sockmods/socksctpsubr.c |
diffstat | 13 files changed, 203 insertions(+), 183 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c Mon Aug 09 20:07:20 2010 -0700 @@ -830,10 +830,10 @@ mdb_printf("%<b>Flow Control%</b>\n"); mdb_printf("tconn_sndbuf\t%?d\n" "conn_sndlowat\t%?d\tfrwnd\t\t%?u\n" - "rwnd\t\t%?u\tinitial rwnd\t%?u\n" + "rwnd\t\t%?u\tlast advertised rwnd\t%?u\n" "rxqueued\t%?u\tcwnd_max\t%?u\n", connp->conn_sndbuf, connp->conn_sndlowat, sctp->sctp_frwnd, - sctp->sctp_rwnd, sctp->sctp_irwnd, sctp->sctp_rxqueued, + sctp->sctp_rwnd, sctp->sctp_arwnd, sctp->sctp_rxqueued, sctp->sctp_cwnd_max); }
--- a/usr/src/uts/common/fs/sockfs/sockcommon.h Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockcommon.h Mon Aug 09 20:07:20 2010 -0700 @@ -186,7 +186,7 @@ rval_t *, int); extern void so_enqueue_msg(struct sonode *, mblk_t *, size_t); extern void so_process_new_message(struct sonode *, mblk_t *, mblk_t *); -extern void so_check_flow_control(struct sonode *); +extern boolean_t so_check_flow_control(struct sonode *); extern mblk_t *socopyinuio(uio_t *, ssize_t, size_t, ssize_t, size_t, int *); extern mblk_t *socopyoutuio(mblk_t *, struct uio *, ssize_t, int *);
--- a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c Mon Aug 09 20:07:20 2010 -0700 @@ -613,9 +613,10 @@ /* * Check flow control on a given sonode. Must have so_lock held, and - * this function will release the hold. + * this function will release the hold. Return true if flow control + * is cleared. */ -void +boolean_t so_check_flow_control(struct sonode *so) { ASSERT(MUTEX_HELD(&so->so_lock)); @@ -635,8 +636,10 @@ } /* filters can start injecting data */ sof_sonode_notify_filters(so, SOF_EV_INJECT_DATA_IN_OK, 0); + return (B_TRUE); } else { mutex_exit(&so->so_lock); + return (B_FALSE); } } @@ -709,7 +712,7 @@ so_process_new_message(so, new_msg_head, new_msg_last_head); } savemp = savemptail = NULL; - rvalp->r_val1 = 0; + rvalp->r_vals = 0; error = 0; mp = so->so_rcv_q_head; @@ -822,7 +825,7 @@ * so_check_flow_control() will drop * so->so_lock. */ - so_check_flow_control(so); + rvalp->r_val2 = so_check_flow_control(so); } } if (mp != NULL) { /* more data blocks in msg */ @@ -840,7 +843,8 @@ * so_check_flow_control() will drop * so->so_lock. */ - so_check_flow_control(so); + rvalp->r_val2 = + so_check_flow_control(so); } } else if (partial_read && !somsghasdata(mp)) { /*
--- a/usr/src/uts/common/fs/sockfs/sockfilter.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockfilter.c Mon Aug 09 20:07:20 2010 -0700 @@ -1344,7 +1344,7 @@ mutex_enter(&so->so_lock); so->so_rcv_queued += diff; /* so_check_flow_control drops so_lock */ - so_check_flow_control(so); + (void) so_check_flow_control(so); } return (retmp); @@ -1612,7 +1612,7 @@ } so->so_state &= ~SS_FIL_RCV_FLOWCTRL; /* so_check_flow_control drops so_lock */ - so_check_flow_control(so); + (void) so_check_flow_control(so); } ASSERT(MUTEX_NOT_HELD(&so->so_lock)); }
--- a/usr/src/uts/common/inet/sctp/sctp.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp.c Mon Aug 09 20:07:20 2010 -0700 @@ -336,11 +336,10 @@ } /* - * In there is unread data, send an ABORT and terminate the + * If there is unread data, send an ABORT and terminate the * association. */ - if (sctp->sctp_rxqueued > 0 || sctp->sctp_irwnd > - sctp->sctp_rwnd) { + if (sctp->sctp_rxqueued > 0 || sctp->sctp_ulp_rxqueued > 0) { sctp_user_abort(sctp, NULL); WAKE_SCTP(sctp); return (error); @@ -807,7 +806,8 @@ sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; sctp->sctp_sack_gaps = 0; - sctp->sctp_sack_toggle = 2; + /* So we will not delay sending the first SACK. */ + sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max; /* Only need to do the allocation if there is no "cached" one. */ if (sctp->sctp_pad_mp == NULL) { @@ -833,11 +833,13 @@ if (err != 0) goto failure; + sctp->sctp_upcalls = psctp->sctp_upcalls; + sctp->sctp_cookie_lifetime = psctp->sctp_cookie_lifetime; sctp->sctp_cwnd_max = psctp->sctp_cwnd_max; sctp->sctp_rwnd = psctp->sctp_rwnd; - sctp->sctp_irwnd = psctp->sctp_rwnd; + sctp->sctp_arwnd = psctp->sctp_arwnd; sctp->sctp_pd_point = psctp->sctp_pd_point; sctp->sctp_rto_max = psctp->sctp_rto_max; sctp->sctp_rto_max_init = psctp->sctp_rto_max_init; @@ -878,7 +880,7 @@ sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; sctp->sctp_rwnd = connp->conn_rcvbuf; - sctp->sctp_irwnd = sctp->sctp_rwnd; + sctp->sctp_arwnd = connp->conn_rcvbuf; sctp->sctp_pd_point = sctp->sctp_rwnd; sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); sctp->sctp_rto_max_init = sctp->sctp_rto_max; @@ -1661,6 +1663,13 @@ int thrs; int max_tasks; + mutex_enter(&sctps->sctps_g_lock); + /* Someone may have beaten us in creating the taskqs. */ + if (sctps->sctps_recvq_tq_list_cur_sz > 0) { + mutex_exit(&sctps->sctps_g_lock); + return; + } + thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, MAX(ncpus, boot_ncpus))); /* @@ -1688,6 +1697,8 @@ sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE); mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); + + mutex_exit(&sctps->sctps_g_lock); } static void
--- a/usr/src/uts/common/inet/sctp/sctp_conn.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp_conn.c Mon Aug 09 20:07:20 2010 -0700 @@ -128,16 +128,6 @@ SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0); SCTP_ASSOC_EST(sctps, acceptor); - - /* - * listener->sctp_rwnd should be the default window size or a - * window size changed via SO_RCVBUF option. - */ - acceptor->sctp_rwnd = listener->sctp_rwnd; - acceptor->sctp_irwnd = acceptor->sctp_rwnd; - acceptor->sctp_pd_point = acceptor->sctp_rwnd; - acceptor->sctp_upcalls = listener->sctp_upcalls; - return (0); } @@ -151,7 +141,6 @@ int err; conn_t *connp, *econnp; sctp_stack_t *sctps; - struct sock_proto_props sopp; cred_t *cr; pid_t cpid; in6_addr_t faddr, laddr; @@ -348,17 +337,6 @@ } ASSERT(SCTP_IS_DETACHED(eager)); eager->sctp_detached = B_FALSE; - bzero(&sopp, sizeof (sopp)); - sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; - sopp.sopp_maxblk = strmsgsz; - if (econnp->conn_family == AF_INET) { - sopp.sopp_wroff = sctps->sctps_wroff_xtra + - sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len; - } else { - sopp.sopp_wroff = sctps->sctps_wroff_xtra + - sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len; - } - eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp); return (eager); }
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp_impl.h Mon Aug 09 20:07:20 2010 -0700 @@ -660,7 +660,7 @@ #define sctp_ulp_disconnected sctp_upcalls->su_disconnected #define sctp_ulp_opctl sctp_upcalls->su_opctl #define sctp_ulp_recv sctp_upcalls->su_recv -#define sctp_ulp_xmitted sctp_upcalls->su_txq_full +#define sctp_ulp_txq_full sctp_upcalls->su_txq_full #define sctp_ulp_prop sctp_upcalls->su_set_proto_props int32_t sctp_state; @@ -739,8 +739,9 @@ /* Inbound flow control */ int32_t sctp_rwnd; /* Current receive window */ - int32_t sctp_irwnd; /* Initial receive window */ + int32_t sctp_arwnd; /* Last advertised window */ int32_t sctp_rxqueued; /* No. of bytes in RX q's */ + int32_t sctp_ulp_rxqueued; /* Data in ULP */ /* Pre-initialized composite headers */ uchar_t *sctp_iphc; /* v4 sctp/ip hdr template buffer */ @@ -800,7 +801,8 @@ sctp_txq_full : 1, /* the tx queue is full */ sctp_ulp_discon_done : 1, /* ulp_disconnecting done */ - sctp_dummy : 6; + sctp_flowctrld : 1, /* upper layer flow controlled */ + sctp_dummy : 5; } sctp_bits; struct { uint32_t @@ -838,6 +840,7 @@ #define sctp_zero_win_probe sctp_bits.sctp_zero_win_probe #define sctp_txq_full sctp_bits.sctp_txq_full #define sctp_ulp_discon_done sctp_bits.sctp_ulp_discon_done +#define sctp_flowctrld sctp_bits.sctp_flowctrld #define sctp_recvsndrcvinfo sctp_events.sctp_recvsndrcvinfo #define sctp_recvassocevnt sctp_events.sctp_recvassocevnt @@ -960,7 +963,7 @@ if ((sctp)->sctp_txq_full && SCTP_TXQ_LEN(sctp) <= \ (sctp)->sctp_connp->conn_sndlowat) { \ (sctp)->sctp_txq_full = 0; \ - (sctp)->sctp_ulp_xmitted((sctp)->sctp_ulpd, \ + (sctp)->sctp_ulp_txq_full((sctp)->sctp_ulpd, \ B_FALSE); \ }
--- a/usr/src/uts/common/inet/sctp/sctp_input.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp_input.c Mon Aug 09 20:07:20 2010 -0700 @@ -1295,7 +1295,6 @@ uint32_t tsn; int dlen; boolean_t tpfinished = B_TRUE; - int32_t new_rwnd; sctp_stack_t *sctps = sctp->sctp_sctps; int error; @@ -1542,31 +1541,27 @@ sctp->sctp_rxqueued -= dlen; if (can_deliver) { - /* step past header to the payload */ dmp->b_rptr = (uchar_t *)(dc + 1); if (sctp_input_add_ancillary(sctp, &dmp, dc, fp, ipp, ira) == 0) { dprint(1, ("sctp_data_chunk: delivering %lu bytes\n", msgdsize(dmp))); - sctp->sctp_rwnd -= dlen; /* * We overload the meaning of b_flag for SCTP sockfs * internal use, to advise sockfs of partial delivery * semantics. */ dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA; - new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, - msgdsize(dmp), 0, &error, NULL); - /* - * Since we always deliver the next TSN data chunk, - * we may buffer a little more than allowed. In - * that case, just mark the window as 0. - */ - if (new_rwnd < 0) - sctp->sctp_rwnd = 0; - else if (new_rwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = new_rwnd; + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, + msgdsize(dmp), 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } SCTP_ACK_IT(sctp, tsn); } else { /* No memory don't ack, the peer will retransmit. */ @@ -1689,7 +1684,6 @@ ipp, ira) == 0) { dprint(1, ("sctp_data_chunk: delivering %lu " "bytes\n", msgdsize(dmp))); - sctp->sctp_rwnd -= dlen; /* * Meaning of b_flag overloaded for SCTP sockfs * internal use, advise sockfs of partial @@ -1697,12 +1691,15 @@ */ dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA; - new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, - dmp, msgdsize(dmp), 0, &error, NULL); - if (new_rwnd < 0) - sctp->sctp_rwnd = 0; - else if (new_rwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = new_rwnd; + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, + msgdsize(dmp), 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } SCTP_ACK_IT(sctp, tsn); } else { /* don't ack, the peer will retransmit */ @@ -1772,6 +1769,8 @@ } else { sc->ssc_a_rwnd = 0; } + /* Remember the last window sent to peer. */ + sctp->sctp_arwnd = sc->ssc_a_rwnd; sc->ssc_numfrags = htons(num_gaps); sc->ssc_numdups = 0; @@ -2359,7 +2358,6 @@ dlen += MBLKL(pmp); } if (can_deliver) { - int32_t nrwnd; int error; dmp->b_rptr = (uchar_t *)(dc + 1); @@ -2368,20 +2366,22 @@ if (sctp_input_add_ancillary(sctp, &dmp, dc, fp, ipp, ira) == 0) { sctp->sctp_rxqueued -= dlen; - sctp->sctp_rwnd -= dlen; /* * Override b_flag for SCTP sockfs * internal use */ dmp->b_flag = 0; - nrwnd = sctp->sctp_ulp_recv( + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv( sctp->sctp_ulpd, dmp, msgdsize(dmp), - 0, &error, NULL); - if (nrwnd < 0) - sctp->sctp_rwnd = 0; - else if (nrwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = nrwnd; + 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } } else { /* * We will resume processing when @@ -4409,33 +4409,30 @@ } /* - * Some amount of data got removed from rx q. - * Check if we should send a window update. - * - * Due to way sctp_rwnd updates are made, ULP can give reports out-of-order. - * To keep from dropping incoming data due to this, we only update - * sctp_rwnd when if it's larger than what we've reported to peer earlier. + * Some amount of data got removed from ULP's receive queue and we can + * push messages up if we are flow controlled before. Reset the receive + * window to full capacity (conn_rcvbuf) and check if we should send a + * window update. */ void sctp_recvd(sctp_t *sctp, int len) { - int32_t old, new; sctp_stack_t *sctps = sctp->sctp_sctps; + conn_t *connp = sctp->sctp_connp; + boolean_t send_sack = B_FALSE; ASSERT(sctp != NULL); RUN_SCTP(sctp); - if (len < sctp->sctp_rwnd) { - WAKE_SCTP(sctp); - return; - } - - old = sctp->sctp_rwnd - sctp->sctp_rxqueued; - new = len - sctp->sctp_rxqueued; - sctp->sctp_rwnd = len; - - if (sctp->sctp_state >= SCTPS_ESTABLISHED && - ((old <= new >> 1) || (old < sctp->sctp_mss))) { + sctp->sctp_flowctrld = B_FALSE; + /* This is the amount of data queued in ULP. */ + sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len; + + if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss) + send_sack = B_TRUE; + sctp->sctp_rwnd = connp->conn_rcvbuf; + + if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) { sctp->sctp_force_sack = 1; SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate); (void) sctp_sack(sctp, NULL);
--- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c Mon Aug 09 20:07:20 2010 -0700 @@ -1119,7 +1119,7 @@ * protocol and here we just whack it. */ connp->conn_rcvbuf = sctp->sctp_rwnd = *i1; - sctp->sctp_irwnd = sctp->sctp_rwnd; + sctp->sctp_arwnd = sctp->sctp_rwnd; sctp->sctp_pd_point = sctp->sctp_rwnd; sopp.sopp_flags = SOCKOPT_RCVHIWAT;
--- a/usr/src/uts/common/inet/sctp/sctp_output.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sctp/sctp_output.c Mon Aug 09 20:07:20 2010 -0700 @@ -295,7 +295,7 @@ */ if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) { sctp->sctp_txq_full = 1; - sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); + sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE); } if (sctp->sctp_state == SCTPS_ESTABLISHED) sctp_output(sctp, UINT_MAX);
--- a/usr/src/uts/common/inet/sockmods/socksctp.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sockmods/socksctp.c Mon Aug 09 20:07:20 2010 -0700 @@ -145,6 +145,7 @@ sosctp_close, /* sop_close */ }; +/* All the upcalls expect the upper handle to be sonode. */ sock_upcalls_t sosctp_sock_upcalls = { so_newconn, so_connected, @@ -156,6 +157,7 @@ NULL, /* su_signal_oob */ }; +/* All the upcalls expect the upper handle to be sctp_sonode/sctp_soassoc. */ sock_upcalls_t sosctp_assoc_upcalls = { sctp_assoc_newconn, sctp_assoc_connected, @@ -175,7 +177,6 @@ struct sctp_sonode *ss; struct sctp_sonode *pss; sctp_sockbuf_limits_t sbl; - sock_upcalls_t *upcalls; int err; ss = SOTOSSO(so); @@ -200,19 +201,21 @@ return (0); } + if ((err = secpolicy_basic_net_access(cr)) != 0) + return (err); + if (so->so_type == SOCK_STREAM) { - upcalls = &sosctp_sock_upcalls; + so->so_proto_handle = (sock_lower_handle_t)sctp_create(so, + NULL, so->so_family, so->so_type, SCTP_CAN_BLOCK, + &sosctp_sock_upcalls, &sbl, cr); so->so_mode = SM_CONNREQUIRED; } else { ASSERT(so->so_type == SOCK_SEQPACKET); - upcalls = &sosctp_assoc_upcalls; + so->so_proto_handle = (sock_lower_handle_t)sctp_create(ss, + NULL, so->so_family, so->so_type, SCTP_CAN_BLOCK, + &sosctp_assoc_upcalls, &sbl, cr); } - if ((err = secpolicy_basic_net_access(cr)) != 0) - return (err); - - so->so_proto_handle = (sock_lower_handle_t)sctp_create(so, NULL, - so->so_family, so->so_type, SCTP_CAN_BLOCK, upcalls, &sbl, cr); if (so->so_proto_handle == NULL) return (ENOMEM); @@ -482,7 +485,7 @@ int flags, error = 0; struct T_unitdata_ind *tind; ssize_t orig_resid = uiop->uio_resid; - int len, count, readcnt = 0, rxqueued; + int len, count, readcnt = 0; socklen_t controllen, namelen; void *opt; mblk_t *mp; @@ -591,8 +594,10 @@ msg->msg_flags |= MSG_NOTIFICATION; } - if (!(mp->b_flag & SCTP_PARTIAL_DATA)) + if (!(mp->b_flag & SCTP_PARTIAL_DATA) && + !(rval.r_val1 & MOREDATA)) { msg->msg_flags |= MSG_EOR; + } freemsg(mp); } done: @@ -606,7 +611,6 @@ */ if (ssa == NULL) { mutex_enter(&so->so_lock); - rxqueued = so->so_rcv_queued; count = so->so_rcvbuf - so->so_rcv_queued; ASSERT(so->so_rcv_q_head != NULL || @@ -614,16 +618,17 @@ so->so_rcv_queued == 0); so_unlock_read(so); - mutex_exit(&so->so_lock); - if (readcnt > 0 && (((count > 0) && - ((rxqueued + readcnt) >= so->so_rcvlowat)) || - (rxqueued == 0))) { - /* - * If amount of queued data is higher than watermark, - * updata SCTP's idea of available buffer space. - */ + /* + * so_dequeue_msg() sets r_val2 to true if flow control was + * cleared and we need to update SCTP. so_flowctrld was + * cleared in so_dequeue_msg() via so_check_flow_control(). + */ + if (rval.r_val2) { + mutex_exit(&so->so_lock); sctp_recvd((struct sctp_s *)so->so_proto_handle, count); + } else { + mutex_exit(&so->so_lock); } } else { /* @@ -634,26 +639,23 @@ * done in so_dequeue_msg(). */ mutex_enter(&so->so_lock); - rxqueued = ssa->ssa_rcv_queued; - - ssa->ssa_rcv_queued = rxqueued - readcnt; + ssa->ssa_rcv_queued -= readcnt; count = so->so_rcvbuf - ssa->ssa_rcv_queued; so_unlock_read(so); - if (readcnt > 0 && - (((count > 0) && (rxqueued >= so->so_rcvlowat)) || - (ssa->ssa_rcv_queued == 0))) { + if (readcnt > 0 && ssa->ssa_flowctrld && + ssa->ssa_rcv_queued < so->so_rcvlowat) { /* - * If amount of queued data is higher than watermark, - * updata SCTP's idea of available buffer space. + * Need to clear ssa_flowctrld, different from 1-1 + * style. */ + ssa->ssa_flowctrld = B_FALSE; mutex_exit(&so->so_lock); - - sctp_recvd((struct sctp_s *)ssa->ssa_conn, count); - + sctp_recvd(ssa->ssa_conn, count); mutex_enter(&so->so_lock); } + /* * MOREDATA flag is set if all data could not be copied */ @@ -723,7 +725,6 @@ sosctp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, struct cred *cr) { - struct sctp_sonode *ss = SOTOSSO(so); mblk_t *mctl; struct cmsghdr *cmsg; struct sctp_sndrcvinfo *sinfo; @@ -891,8 +892,8 @@ } /* Copy in the message. */ - if ((error = sosctp_uiomove(mctl, count, ss->ss_wrsize, ss->ss_wroff, - uiop, flags)) != 0) { + if ((error = sosctp_uiomove(mctl, count, so->so_proto_props.sopp_maxblk, + so->so_proto_props.sopp_wroff, uiop, flags)) != 0) { goto error_ret; } error = sctp_sendmsg((struct sctp_s *)so->so_proto_handle, mctl, 0); @@ -1031,9 +1032,8 @@ } else { mutex_exit(&so->so_lock); ssa->ssa_state |= SS_ISDISCONNECTING; - sctp_recvd((struct sctp_s *)ssa->ssa_conn, - so->so_rcvbuf); - error = sctp_disconnect((struct sctp_s *)ssa->ssa_conn); + sctp_recvd(ssa->ssa_conn, so->so_rcvbuf); + error = sctp_disconnect(ssa->ssa_conn); mutex_enter(&so->so_lock); } goto refrele; @@ -1825,8 +1825,8 @@ ss = SOTOSSO(so); /* - * Initiate connection shutdown. Update SCTP's receive - * window. + * Initiate connection shutdown. Tell SCTP if there is any data + * left unread. */ sctp_recvd((struct sctp_s *)so->so_proto_handle, so->so_rcvbuf - so->so_rcv_queued); @@ -1845,9 +1845,9 @@ sosctp_assoc_isdisconnected(ssa, 0); mutex_exit(&so->so_lock); - sctp_recvd((struct sctp_s *)ssa->ssa_conn, - so->so_rcvbuf - ssa->ssa_rcv_queued); - (void) sctp_disconnect((struct sctp_s *)ssa->ssa_conn); + sctp_recvd(ssa->ssa_conn, so->so_rcvbuf - + ssa->ssa_rcv_queued); + (void) sctp_disconnect(ssa->ssa_conn); mutex_enter(&so->so_lock); SSA_REFRELE(ss, ssa); @@ -1879,8 +1879,6 @@ /* We are the sole owner of so now */ mutex_enter(&so->so_lock); - so_rcv_flush(so); - /* Free all pending connections */ so_acceptq_flush(so, B_TRUE); @@ -1908,6 +1906,15 @@ sctp_close((struct sctp_s *)so->so_proto_handle); so->so_proto_handle = NULL; + /* + * Note until sctp_close() is called, SCTP can still send up + * messages, such as event notifications. So we should flush + * the recevie buffer after calling sctp_close(). + */ + mutex_enter(&so->so_lock); + so_rcv_flush(so); + mutex_exit(&so->so_lock); + sonode_fini(so); } @@ -1929,8 +1936,8 @@ sock_lower_handle_t connind, sock_downcalls_t *dc, struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **ucp) { - struct sonode *lso = (struct sonode *)parenthandle; - struct sctp_sonode *lss = SOTOSSO(lso); + struct sctp_sonode *lss = (struct sctp_sonode *)parenthandle; + struct sonode *lso = &lss->ss_so; struct sctp_soassoc *ssa; sctp_assoc_t id; @@ -2144,6 +2151,9 @@ ssa->ssa_rcv_queued += len; space_available = so->so_rcvbuf - ssa->ssa_rcv_queued; + if (space_available <= 0) + ssa->ssa_flowctrld = B_TRUE; + so_enqueue_msg(so, mp, len); /* so_notify_data drops so_lock */ @@ -2179,32 +2189,44 @@ struct sock_proto_props *soppp) { struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle; - struct sctp_sonode *ss; + struct sonode *so; if (ssa->ssa_type == SOSCTP_ASSOC) { - ss = ssa->ssa_sonode; - mutex_enter(&ss->ss_so.so_lock); + so = &ssa->ssa_sonode->ss_so; + + mutex_enter(&so->so_lock); - /* - * Only change them if they're set. - */ - if (soppp->sopp_wroff != 0) { + /* Per assoc_id properties. */ + if (soppp->sopp_flags & SOCKOPT_WROFF) ssa->ssa_wroff = soppp->sopp_wroff; - } - if (soppp->sopp_maxblk != 0) { + if (soppp->sopp_flags & SOCKOPT_MAXBLK) ssa->ssa_wrsize = soppp->sopp_maxblk; - } } else { - ss = (struct sctp_sonode *)handle; - mutex_enter(&ss->ss_so.so_lock); + so = &((struct sctp_sonode *)handle)->ss_so; + mutex_enter(&so->so_lock); + + if (soppp->sopp_flags & SOCKOPT_WROFF) + so->so_proto_props.sopp_wroff = soppp->sopp_wroff; + if (soppp->sopp_flags & SOCKOPT_MAXBLK) + so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; + if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) { + ssize_t lowat; - if (soppp->sopp_wroff != 0) { - ss->ss_wroff = soppp->sopp_wroff; - } - if (soppp->sopp_maxblk != 0) { - ss->ss_wrsize = soppp->sopp_maxblk; + so->so_rcvbuf = soppp->sopp_rxhiwat; + /* + * The low water mark should be adjusted properly + * if the high water mark is changed. It should + * not be bigger than 1/4 of high water mark. + */ + lowat = soppp->sopp_rxhiwat >> 2; + if (so->so_rcvlowat > lowat) { + /* Sanity check... */ + if (lowat == 0) + so->so_rcvlowat = soppp->sopp_rxhiwat; + else + so->so_rcvlowat = lowat; + } } } - - mutex_exit(&ss->ss_so.so_lock); + mutex_exit(&so->so_lock); }
--- a/usr/src/uts/common/inet/sockmods/socksctp.h Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sockmods/socksctp.h Mon Aug 09 20:07:20 2010 -0700 @@ -18,9 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SOCKSCTP_H_ @@ -59,9 +59,10 @@ uint_t ssa_state; /* same as so_state */ int ssa_error; /* same as so_error */ boolean_t ssa_snd_qfull; - int ssa_wroff; - size_t ssa_wrsize; + ushort_t ssa_wroff; + ssize_t ssa_wrsize; int ssa_rcv_queued; /* queued rx bytes/# of conn */ + boolean_t ssa_flowctrld; /* receive flow controlled */ }; /* 1-N socket association cache defined in socksctp.c */
--- a/usr/src/uts/common/inet/sockmods/socksctpsubr.c Mon Aug 09 19:07:25 2010 -0700 +++ b/usr/src/uts/common/inet/sockmods/socksctpsubr.c Mon Aug 09 20:07:20 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -177,6 +176,7 @@ ssa->ssa_error = 0; ssa->ssa_snd_qfull = 0; ssa->ssa_rcv_queued = 0; + ssa->ssa_flowctrld = B_FALSE; } dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa)); return (ssa); @@ -515,32 +515,37 @@ { mblk_t *mp, **nmp, *last_mp; struct sctp_soassoc *tmp; + struct sonode *nso, *sso; sosctp_so_inherit(ss, nss); - nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK)); - nss->ss_so.so_state |= + sso = &ss->ss_so; + nso = &nss->ss_so; + + nso->so_state |= (sso->so_state & (SS_NDELAY|SS_NONBLOCK)); + nso->so_state |= (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING| SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND)); - nss->ss_so.so_error = ssa->ssa_error; - nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull; - nss->ss_wroff = ssa->ssa_wroff; - nss->ss_wrsize = ssa->ssa_wrsize; - nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued; - nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; + nso->so_error = ssa->ssa_error; + nso->so_snd_qfull = ssa->ssa_snd_qfull; + nso->so_proto_props.sopp_wroff = ssa->ssa_wroff; + nso->so_proto_props.sopp_maxblk = ssa->ssa_wrsize; + nso->so_rcv_queued = ssa->ssa_rcv_queued; + nso->so_flowctrld = ssa->ssa_flowctrld; + nso->so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; /* The peeled off socket is connection oriented */ - nss->ss_so.so_mode |= SM_CONNREQUIRED; + nso->so_mode |= SM_CONNREQUIRED; /* Consolidate all data on a single rcv list */ - if (ss->ss_so.so_rcv_head != NULL) { - so_process_new_message(&ss->ss_so, ss->ss_so.so_rcv_head, - ss->ss_so.so_rcv_last_head); - ss->ss_so.so_rcv_head = NULL; - ss->ss_so.so_rcv_last_head = NULL; + if (sso->so_rcv_head != NULL) { + so_process_new_message(&ss->ss_so, sso->so_rcv_head, + sso->so_rcv_last_head); + sso->so_rcv_head = NULL; + sso->so_rcv_last_head = NULL; } - if (nss->ss_so.so_rcv_queued > 0) { - nmp = &ss->ss_so.so_rcv_q_head; + if (nso->so_rcv_queued > 0) { + nmp = &sso->so_rcv_q_head; last_mp = NULL; while ((mp = *nmp) != NULL) { tmp = *(struct sctp_soassoc **)DB_BASE(mp); @@ -560,13 +565,12 @@ if (tmp == ssa) { *nmp = mp->b_next; ASSERT(DB_TYPE(mp) != M_DATA); - if (nss->ss_so.so_rcv_q_last_head == NULL) { - nss->ss_so.so_rcv_q_head = mp; + if (nso->so_rcv_q_last_head == NULL) { + nso->so_rcv_q_head = mp; } else { - nss->ss_so.so_rcv_q_last_head->b_next = - mp; + nso->so_rcv_q_last_head->b_next = mp; } - nss->ss_so.so_rcv_q_last_head = mp; + nso->so_rcv_q_last_head = mp; mp->b_next = NULL; } else { nmp = &mp->b_next; @@ -574,7 +578,7 @@ } } - ss->ss_so.so_rcv_q_last_head = last_mp; + sso->so_rcv_q_last_head = last_mp; } }