changeset 18419:d4b857e15a17

OS-4213 lxbrand should be able to set TCP_DEFER_ACCEPT after other socket operations
author Jerry Jelinek <jerry.jelinek@joyent.com>
date Thu, 23 Apr 2015 12:28:14 +0000
parents 2e4343a78c78
children 2100f199142e
files usr/src/uts/common/fs/sockfs/sockcommon_sops.c usr/src/uts/common/fs/sockfs/sockfilter.c usr/src/uts/common/fs/sockfs/sockfilter_impl.h usr/src/uts/common/inet/sockmods/datafilt.c usr/src/uts/common/sys/socketvar.h usr/src/uts/common/sys/sockfilter.h
diffstat 6 files changed, 69 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c	Thu Apr 23 12:28:14 2015 +0000
@@ -128,7 +128,7 @@
 {
 	int error;
 
-	SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
+	SO_BLOCK_FALLBACK_SAFE(so, SOP_BIND(so, name, namelen, flags, cr));
 
 	ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 
@@ -648,7 +648,7 @@
 {
 	int error;
 
-	SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
+	SO_BLOCK_FALLBACK_SAFE(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 
 	if (so->so_filter_active == 0 ||
 	    (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
@@ -697,7 +697,7 @@
 	if (level == SOL_FILTER)
 		return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 
-	SO_BLOCK_FALLBACK(so,
+	SO_BLOCK_FALLBACK_SAFE(so,
 	    SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 
 	if ((so->so_filter_active == 0 ||
@@ -786,7 +786,7 @@
 	if (level == SOL_FILTER)
 		return (sof_setsockopt(so, option_name, optval, optlen, cr));
 
-	SO_BLOCK_FALLBACK(so,
+	SO_BLOCK_FALLBACK_SAFE(so,
 	    SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 
 	/* X/Open requires this check */
--- a/usr/src/uts/common/fs/sockfs/sockfilter.c	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockfilter.c	Thu Apr 23 12:28:14 2015 +0000
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
  */
 
 #include <sys/systm.h>
@@ -246,6 +247,18 @@
 
 		/* Module loaded OK, so there must be an ops vector */
 		ASSERT(ent->sofe_mod != NULL);
+
+		/*
+		 * Check again to confirm ATTACH is ok. See if the the module
+		 * is not SOF_ATT_SAFE after an unsafe operation has taken
+		 * place.
+		 */
+		if ((ent->sofe_mod->sofm_flags & SOF_ATT_SAFE) == 0 &&
+		    so->so_state & SS_FILOP_UNSF) {
+			sof_instance_destroy(inst);
+			return (EINVAL);
+		}
+
 		inst->sofi_ops = &ent->sofe_mod->sofm_ops;
 
 		SOF_STAT_ADD(inst, tot_active_attach, 1);
@@ -1444,7 +1457,13 @@
  * sof_register(version, name, ops, flags)
  *
  * Register a socket filter identified by name `name' and which should use
- * the ops vector `ops' for event notification. `flags' should be set to 0.
+ * the ops vector `ops' for event notification. `flags' should be set to 0
+ * by default for "unsafe" modules or SOF_ATT_SAFE for "safe" modules. An
+ * unsafe filter is one that cannot be attached after any socket operation has
+ * occured. This is the legacy default. A "safe" filter can be attached even
+ * after some basic initial socket operations have taken place. This set is
+ * currently bind, getsockname, getsockopt and setsockopt. The order in which
+ * a "safe" filter can be attached is more relaxed, and thus more flexible.
  * On success 0 is returned, otherwise an errno is returned.
  */
 int
@@ -1452,14 +1471,13 @@
 {
 	sof_module_t *mod;
 
-	_NOTE(ARGUNUSED(flags));
-
 	if (version != SOF_VERSION)
 		return (EINVAL);
 
 	mod = kmem_zalloc(sizeof (sof_module_t), KM_SLEEP);
 	mod->sofm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(mod->sofm_name, name);
+	mod->sofm_flags = flags;
 	mod->sofm_ops = *ops;
 
 	mutex_enter(&sof_module_lock);
--- a/usr/src/uts/common/fs/sockfs/sockfilter_impl.h	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockfilter_impl.h	Thu Apr 23 12:28:14 2015 +0000
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
  */
 
 #ifndef	_SOCKFS_SOCKFILTER_H
@@ -51,6 +52,7 @@
 
 struct sof_module {
 	char		*sofm_name;
+	int		sofm_flags;
 	sof_ops_t	sofm_ops;
 	uint_t		sofm_refcnt;
 	list_node_t	sofm_node;
--- a/usr/src/uts/common/inet/sockmods/datafilt.c	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/inet/sockmods/datafilt.c	Thu Apr 23 12:28:14 2015 +0000
@@ -85,10 +85,15 @@
 {
 	int error;
 
-	if ((error = sof_register(SOF_VERSION, DATAFILT_MODULE, &dataf_ops, 0))
-	    != 0)
-		return (error);
-	if ((error = mod_install(&dataf_modlinkage)) != 0)
+	/*
+	 * This module is safe to attach even after some preliminary socket
+	 * setup calls have taken place. See the comment for SOF_ATT_SAFE.
+	 */
+	err = sof_register(SOF_VERSION, DATAFILT_MODULE, &dataf_ops,
+	    SOF_ATT_SAFE);
+	if (err != 0)
+		return (err);
+	if ((err = mod_install(&dataf_modlinkage)) != 0)
 		(void) sof_unregister(DATAFILT_MODULE);
 
 	return (error);
--- a/usr/src/uts/common/sys/socketvar.h	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/sys/socketvar.h	Thu Apr 23 12:28:14 2015 +0000
@@ -297,15 +297,16 @@
 #define	SS_OOBPEND		0x00002000 /* OOB pending or present - poll */
 #define	SS_HAVEOOBDATA		0x00004000 /* OOB data present */
 #define	SS_HADOOBDATA		0x00008000 /* OOB data consumed */
+
 #define	SS_CLOSING		0x00010000 /* in process of closing */
-
 #define	SS_FIL_DEFER		0x00020000 /* filter deferred notification */
 #define	SS_FILOP_OK		0x00040000 /* socket can attach filters */
 #define	SS_FIL_RCV_FLOWCTRL	0x00080000 /* filter asserted rcv flow ctrl */
+
 #define	SS_FIL_SND_FLOWCTRL	0x00100000 /* filter asserted snd flow ctrl */
 #define	SS_FIL_STOP		0x00200000 /* no more filter actions */
-
 #define	SS_SODIRECT		0x00400000 /* transport supports sodirect */
+#define	SS_FILOP_UNSF		0x00800000 /* block attaching unsafe filters */
 
 #define	SS_SENTLASTREADSIG	0x01000000 /* last rx signal has been sent */
 #define	SS_SENTLASTWRITESIG	0x02000000 /* last tx signal has been sent */
@@ -321,7 +322,8 @@
 
 /*
  * Sockets that can fall back to TPI must ensure that fall back is not
- * initiated while a thread is using a socket.
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future filter attachment.
  */
 #define	SO_BLOCK_FALLBACK(so, fn)				\
 	ASSERT(MUTEX_NOT_HELD(&(so)->so_lock));			\
@@ -337,6 +339,24 @@
 		}						\
 	}
 
+/*
+ * Sockets that can fall back to TPI must ensure that fall back is not
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future unsafe filter attachment. Safe filters can still attach after
+ * we execute the function in which this macro is used.
+ */
+#define	SO_BLOCK_FALLBACK_SAFE(so, fn)				\
+	ASSERT(MUTEX_NOT_HELD(&(so)->so_lock));			\
+	rw_enter(&(so)->so_fallback_rwlock, RW_READER);		\
+	if ((so)->so_state & SS_FALLBACK_COMP) {		\
+		rw_exit(&(so)->so_fallback_rwlock);		\
+		return (fn);					\
+	} else if (((so)->so_state & SS_FILOP_UNSF) == 0) {	\
+		mutex_enter(&(so)->so_lock);			\
+		(so)->so_state |= SS_FILOP_UNSF;		\
+		mutex_exit(&(so)->so_lock);			\
+	}
+
 #define	SO_UNBLOCK_FALLBACK(so)	{			\
 	rw_exit(&(so)->so_fallback_rwlock);		\
 }
--- a/usr/src/uts/common/sys/sockfilter.h	Wed Apr 22 22:12:34 2015 -0700
+++ b/usr/src/uts/common/sys/sockfilter.h	Thu Apr 23 12:28:14 2015 +0000
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
  */
 
 #ifndef	_SYS_SOCKFILTER_H
@@ -129,6 +130,15 @@
 
 #define	SOF_VERSION	1
 
+/*
+ * Flag indicating that the filter module is safe to attach after bind,
+ * getsockname, getsockopt or setsockopt calls. By default filters are unsafe
+ * so may not be attached after any socket operation. However, a safe filter
+ * can still be attached after one of the above calls. This makes attaching
+ * the filter less dependent on the initial socket setup order.
+ */
+#define	SOF_ATT_SAFE	0x1
+
 extern int	sof_register(int, const char *, const sof_ops_t *, int);
 extern int	sof_unregister(const char *);