changeset 1033:00b446a56cb9 draft

WIP: synch: recursive lock class locking Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Sat, 14 Jan 2023 13:44:48 -0500
parents 111936b70fcc
children
files include/jeffpc/synch.h synch.c
diffstat 2 files changed, 173 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/include/jeffpc/synch.h	Sat Jan 14 13:44:43 2023 -0500
+++ b/include/jeffpc/synch.h	Sat Jan 14 13:44:48 2023 -0500
@@ -29,6 +29,11 @@
 
 #include <jeffpc/config.h>
 
+enum lock_policy {
+	LOCK_POL_NONE,		/* non-recursive */
+	LOCK_POL_ADDR_ASC,	/* ascending by address */
+};
+
 struct lock_class {
 	const char *name;
 #ifdef JEFFPC_LOCK_TRACKING
@@ -92,14 +97,16 @@
 				}; \
 				mxdestroy(&mx_ctx, (l)); \
 			} while (0)
-#define MXLOCK(l)	do { \
+#define MXLOCK_RECURSIVE(l, pol) \
+			do { \
 				struct lock_context mx_ctx = { \
 					.lockname = #l, \
 					.file = __FILE__, \
 					.line = __LINE__, \
 				}; \
-				mxlock(&mx_ctx, (l)); \
+				mxlock(&mx_ctx, (l), (pol)); \
 			} while (0)
+#define MXLOCK(l)	MXLOCK_RECURSIVE((l), LOCK_POL_NONE)
 #define MXUNLOCK(l)	do { \
 				struct lock_context mx_ctx = { \
 					.lockname = #l, \
@@ -212,7 +219,8 @@
 extern void mxinit(const struct lock_context *where, struct lock *m,
 		   struct lock_class *lc);
 extern void mxdestroy(const struct lock_context *where, struct lock *m);
-extern void mxlock(const struct lock_context *where, struct lock *m);
+extern void mxlock(const struct lock_context *where, struct lock *m,
+		   enum lock_policy pol);
 extern void mxunlock(const struct lock_context *where, struct lock *m);
 
 extern void rwinit(const struct lock_context *where, struct rwlock *l,
--- a/synch.c	Sat Jan 14 13:44:43 2023 -0500
+++ b/synch.c	Sat Jan 14 13:44:48 2023 -0500
@@ -75,6 +75,7 @@
 	struct lock_info *info;
 	struct lock_context where;
 	enum synch_type type;
+	enum lock_policy policy;
 	bool rwlock_wr:1;
 };
 
@@ -137,7 +138,7 @@
 	return buf;
 }
 
-static inline char *get_held_chars(struct held_lock *held, char buf[2])
+static inline char *get_held_chars(struct held_lock *held, char buf[3])
 {
 	char *ptr = buf;
 
@@ -152,6 +153,17 @@
 			break;
 	}
 
+	switch (held->policy) {
+		case LOCK_POL_NONE:
+			*ptr = '.';
+			ptr++;
+			break;
+		case LOCK_POL_ADDR_ASC:
+			*ptr = '+';
+			ptr++;
+			break;
+	}
+
 	/* null terminate for good measure */
 	*ptr = '\0';
 
@@ -214,9 +226,9 @@
 		struct lock_info *info = cur->info;
 		struct lock *lock = container_of(info, struct lock, info);
 		char synch_chars[2];
-		char held_chars[2];
+		char held_chars[3];
 
-		cmn_err(CE_CRIT, "lockdep:  %s #%zd: %s (%p) %s <%s%s> acquired at %s:%d",
+		cmn_err(CE_CRIT, "lockdep:  %s #%zd: %s (%p) %s <%s|%s> acquired at %s:%d",
 			(cur == highlight) ? "->" : "  ",
 			i, info->name, lock,
 			synch_type_str(info->type),
@@ -239,6 +251,31 @@
 	panic("lockdep: Aborting - destroying held %s", type);
 }
 
+static void error_policy_violation(struct held_lock *held, struct lock_info *new,
+				   const struct lock_context *where,
+				   enum lock_policy pol)
+{
+	const bool policy_mismatch = pol != held->policy;
+
+	cmn_err(CE_CRIT, "lockdep: recursive locking policy violation detected");
+
+	cmn_err(CE_CRIT, "lockdep: thread is trying to acquire:");
+	print_synch_as(new, where, new->type);
+
+	if (policy_mismatch)
+		cmn_err(CE_CRIT, "lockdep: but the thread is already "
+			"holding a %s of same class but different policy:",
+			synch_type_str(held->type));
+	else
+		cmn_err(CE_CRIT, "lockdep: but the thread is already "
+			"holding a %s of same class which violates recursive "
+			"policy:", synch_type_str(held->type));
+
+	print_held_locks(held);
+
+	atomic_set(&lockdep_on, 0);
+}
+
 static void error_lock(struct held_lock *held, struct lock_info *new,
 		       const struct lock_context *where)
 {
@@ -359,6 +396,29 @@
  * dependency tracking
  */
 #ifdef JEFFPC_LOCK_TRACKING
+static bool check_recursive_policy(struct held_lock *held,
+				   struct lock_info *info)
+{
+	switch (held->policy) {
+		case LOCK_POL_NONE:
+			/* this should never be called with the none policy */
+			panic("%s called with LOCK_POL_NONE", __func__);
+			break;
+		case LOCK_POL_ADDR_ASC: {
+			uintptr_t last, this;
+
+			last = (uintptr_t) held->info;
+			this = (uintptr_t) info;
+
+			if (last > this)
+				return false;
+			break;
+		}
+	}
+
+	return true;
+}
+
 /*
  * Returns a negative int on error, 0 if there was no change to the graph,
  * and a positive int if a new dependency was added.
@@ -494,7 +554,7 @@
 
 static void check_unheld_for_lock(struct lock_info *info,
 				  const struct lock_context *where,
-				  bool rwlock_wr)
+				  bool rwlock_wr, enum lock_policy pol)
 {
 #ifdef JEFFPC_LOCK_TRACKING
 	struct held_lock *held;
@@ -507,11 +567,88 @@
 	if (!held)
 		goto out; /* nothing held, nothing to check */
 
+	/*
+	 * Check recursive locking policy
+	 *
+	 * The last acquired lock (1) is not the same as what's being
+	 * acquired now, and (2) has the same lock class.
+	 *
+	 * If either of these is not true, we let the deadlock and circular
+	 * dependency checkers do their thing.  This means that if we do:
+	 *
+	 *	MXLOCK(a); // class A
+	 *	MXLOCK(b); // class B
+	 *	MXLOCK(c); // class A again
+	 *
+	 * we will *not* perform recursion policy checks.  Instead we will
+	 * end up with a circular dependency error which will make the
+	 * ordering issue easier to understand.
+	 *
+	 * Note that this doesn't prevent the acquision and release of other
+	 * locks between recursive acquisition.  E.g., this is fine:
+	 *
+	 *	MXLOCK(a); // class A
+	 *	MXLOCK(b); // class B
+	 *	MXUNLOCK(b);
+	 *	MXLOCK(c); // class A again
+	 */
+	if (held && (held->info != info) && (held->info->lc == info->lc)) {
+		if (pol != held->policy) {
+			error_policy_violation(held, info, where, pol);
+			return;
+		}
+
+		if (pol != LOCK_POL_NONE) {
+			if (!check_recursive_policy(held, info)) {
+				error_policy_violation(held, info, where, pol);
+				return;
+			}
+
+			/*
+			 * We are recursively locking but it isn't a
+			 * self-deadlock (it is a different lock), the
+			 * policy matches for all currently held locks of
+			 * this class (by induction), and it isn't against
+			 * the policy.
+			 *
+			 * As a result we know that this (1) is *not* a
+			 * deadlock, (2) is *not* a circular dependency
+			 * issue.  (The second point is guaranteed by not
+			 * allowing other locks to be held between the
+			 * recursive calls.  In other words, all the locks
+			 * held for the current lock class are bunched up at
+			 * the top of the held-stack.
+			 *
+			 * Therefore, we can skip the rest of the checks and
+			 * just push a new entry to the held stack.
+			 */
+
+			goto out;
+		}
+
+		/*
+		 * No policy was specified, let the deadlock detector below
+		 * deal with it.
+		 */
+	}
+
 	/* check for deadlocks & recursive locking */
 	for_each_held_lock(i, held) {
+		/*
+		 * We are dealing with recursion if we held points to the
+		 * same lock as what was passed in or it points to a
+		 * different lock of the same lock class.  All else is safe.
+		 */
 		if ((held->info != info) && (held->info->lc != info->lc))
 			continue;
 
+		/*
+		 * Recursive locking detected.
+		 *
+		 * All the safe situations have been handled just before
+		 * this loop.
+		 */
+
 		error_lock(held, info, where);
 		return;
 	}
@@ -530,6 +667,7 @@
 	held->info = info;
 	held->where = *where;
 	held->type = info->type;
+	held->policy = pol;
 	held->rwlock_wr = rwlock_wr;
 #endif
 }
@@ -574,6 +712,18 @@
 #endif
 }
 
+/* return true if the passed in policy is known */
+static bool verify_lock_policy(enum lock_policy pol)
+{
+	switch (pol) {
+		case LOCK_POL_NONE:
+		case LOCK_POL_ADDR_ASC:
+			return true;
+	}
+
+	return false;
+}
+
 static void verify_lock_init(const struct lock_context *where, struct lock *l,
 			     struct lock_class *lc)
 {
@@ -601,13 +751,14 @@
 	/* keep the synch type set to aid debugging */
 }
 
-static void verify_lock_lock(const struct lock_context *where, struct lock *l)
+static void verify_lock_lock(const struct lock_context *where, struct lock *l,
+			     enum lock_policy pol)
 {
-	if (!l)
+	if (!l || !verify_lock_policy(pol))
 		print_invalid_call("MXLOCK", where);
 
 	check_magic(&l->info, "acquire", where, SYNCH_TYPE_MUTEX);
-	check_unheld_for_lock(&l->info, where, false);
+	check_unheld_for_lock(&l->info, where, false, pol);
 }
 
 static void verify_lock_unlock(const struct lock_context *where, struct lock *l)
@@ -653,7 +804,7 @@
 		print_invalid_call("RWLOCK", where);
 
 	check_magic(&l->info, "acquire", where, SYNCH_TYPE_RW);
-	check_unheld_for_lock(&l->info, where, wr);
+	check_unheld_for_lock(&l->info, where, wr, LOCK_POL_NONE);
 }
 
 static void verify_rw_unlock(const struct lock_context *where, struct rwlock *l)
@@ -797,11 +948,12 @@
 		      where->file, where->line, strerror(ret));
 }
 
-void mxlock(const struct lock_context *where, struct lock *l)
+void mxlock(const struct lock_context *where, struct lock *l,
+	    enum lock_policy pol)
 {
 	int ret;
 
-	verify_lock_lock(where, l);
+	verify_lock_lock(where, l, pol);
 
 	ret = pthread_mutex_lock(&l->lock);
 	if (ret)