changeset 20062:3757e6a94e62

13082 pageout needs a deadman Reviewed by: Andy Fiddaman <andy@omniosce.org> Reviewed by: Jason King <jason.brian.king@gmail.com> Reviewed by: Peter Tribble <peter.tribble@gmail.com> Approved by: Robert Mustacchi <rm@fingolfin.org>
author Joshua M. Clulow <josh@sysmgr.org>
date Mon, 21 Sep 2020 21:58:47 -0700
parents c27fa0abd0d8
children af8fee8a8d70
files usr/src/uts/common/os/clock.c usr/src/uts/common/os/vm_pageout.c usr/src/uts/common/vm/vm_page.c
diffstat 3 files changed, 94 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/os/clock.c	Fri Sep 11 22:06:27 2020 -0700
+++ b/usr/src/uts/common/os/clock.c	Mon Sep 21 21:58:47 2020 -0700
@@ -318,7 +318,9 @@
 cyclic_id_t clock_cyclic;	/* clock()'s cyclic_id */
 cyclic_id_t deadman_cyclic;	/* deadman()'s cyclic_id */
 
-extern void	clock_tick_schedule(int);
+extern void clock_tick_schedule(int);
+extern void set_freemem(void);
+extern void pageout_deadman(void);
 
 static int lgrp_ticks;		/* counter to schedule lgrp load calcs */
 
@@ -400,7 +402,6 @@
 	uint_t	w_io;
 	cpu_t	*cp;
 	cpupart_t *cpupart;
-	extern	void	set_freemem();
 	void	(*funcp)();
 	int32_t ltemp;
 	int64_t lltemp;
@@ -477,6 +478,7 @@
 	if (one_sec) {
 		loadavg_update();
 		deadman_counter++;
+		pageout_deadman();
 	}
 
 	/*
--- a/usr/src/uts/common/os/vm_pageout.c	Fri Sep 11 22:06:27 2020 -0700
+++ b/usr/src/uts/common/os/vm_pageout.c	Mon Sep 21 21:58:47 2020 -0700
@@ -18,13 +18,18 @@
  *
  * CDDL HEADER END
  */
+
+/*
+ * Copyright 2020 Oxide Computer Company
+ */
+
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved  	*/
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
 
 /*
  * University Copyright- Copyright (c) 1982, 1986, 1988
@@ -58,6 +63,7 @@
 #include <sys/tnf_probe.h>
 #include <sys/mem_cage.h>
 #include <sys/time.h>
+#include <sys/stdbool.h>
 
 #include <vm/hat.h>
 #include <vm/as.h>
@@ -504,6 +510,23 @@
 static kmutex_t push_lock;		/* protects req pool */
 static kcondvar_t push_cv;
 
+/*
+ * If pageout() is stuck on a single push for this many seconds,
+ * pageout_deadman() will assume the system has hit a memory deadlock.  If set
+ * to 0, the deadman will have no effect.
+ *
+ * Note that we are only looking for stalls in the calls that pageout() makes
+ * to VOP_PUTPAGE().  These calls are merely asynchronous requests for paging
+ * I/O, which should not take long unless the underlying strategy call blocks
+ * indefinitely for memory.  The actual I/O request happens (or fails) later.
+ */
+uint_t pageout_deadman_seconds = 90;
+
+static uint_t pageout_stucktime = 0;
+static bool pageout_pushing = false;
+static uint64_t pageout_pushcount = 0;
+static uint64_t pageout_pushcount_seen = 0;
+
 static int async_list_size = 256;	/* number of async request structs */
 
 static void pageout_scanner(void);
@@ -718,6 +741,7 @@
 		}
 		push_list = arg->a_next;
 		arg->a_next = NULL;
+		pageout_pushing = true;
 		mutex_exit(&push_lock);
 
 		if (VOP_PUTPAGE(arg->a_vp, (offset_t)arg->a_off,
@@ -729,6 +753,8 @@
 		VN_RELE(arg->a_vp);
 
 		mutex_enter(&push_lock);
+		pageout_pushing = false;
+		pageout_pushcount++;
 		arg->a_next = req_freelist;	/* back on freelist */
 		req_freelist = arg;
 		push_list_size--;
@@ -928,6 +954,58 @@
 }
 
 /*
+ * The pageout deadman is run once per second by clock().
+ */
+void
+pageout_deadman(void)
+{
+	if (panicstr != NULL) {
+		/*
+		 * There is no pageout after panic.
+		 */
+		return;
+	}
+
+	if (pageout_deadman_seconds == 0) {
+		/*
+		 * The deadman is not enabled.
+		 */
+		return;
+	}
+
+	if (!pageout_pushing) {
+		goto reset;
+	}
+
+	/*
+	 * We are pushing a page.  Check to see if it is the same call we saw
+	 * last time we looked:
+	 */
+	if (pageout_pushcount != pageout_pushcount_seen) {
+		/*
+		 * It is a different call from the last check, so we are not
+		 * stuck.
+		 */
+		goto reset;
+	}
+
+	if (++pageout_stucktime >= pageout_deadman_seconds) {
+		panic("pageout_deadman: stuck pushing the same page for %d "
+		    "seconds (freemem is %lu)", pageout_deadman_seconds,
+		    freemem);
+	}
+
+	return;
+
+reset:
+	/*
+	 * Reset our tracking state to reflect that we are not stuck:
+	 */
+	pageout_stucktime = 0;
+	pageout_pushcount_seen = pageout_pushcount;
+}
+
+/*
  * Look at the page at hand.  If it is locked (e.g., for physical i/o),
  * system (u., page table) or free, then leave it alone.  Otherwise,
  * if we are running the front hand, turn off the page's reference bit.
@@ -950,7 +1028,7 @@
 
 	/*
 	 * Skip pages:
-	 * 	- associated with the kernel vnode since
+	 *	- associated with the kernel vnode since
 	 *	    they are always "exclusively" locked.
 	 *	- that are free
 	 *	- that are shared more than po_share'd times
--- a/usr/src/uts/common/vm/vm_page.c	Fri Sep 11 22:06:27 2020 -0700
+++ b/usr/src/uts/common/vm/vm_page.c	Mon Sep 21 21:58:47 2020 -0700
@@ -25,8 +25,8 @@
  * Copyright 2018 Joyent, Inc.
  */
 
-/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989  AT&T	*/
-/*	  All Rights Reserved  	*/
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989  AT&T */
+/* All Rights Reserved */
 
 /*
  * University Copyright- Copyright (c) 1982, 1986, 1988
@@ -173,8 +173,8 @@
 	kmutex_t	pcf_lock;	/* protects the structure */
 	uint_t		pcf_count;	/* page count */
 	uint_t		pcf_wait;	/* number of waiters */
-	uint_t		pcf_block; 	/* pcgs flag to page_free() */
-	uint_t		pcf_reserve; 	/* pages freed after pcf_block set */
+	uint_t		pcf_block;	/* pcgs flag to page_free() */
+	uint_t		pcf_reserve;	/* pages freed after pcf_block set */
 	uint_t		pcf_fill[10];	/* to line up on the caches */
 };
 
@@ -1353,7 +1353,7 @@
  * clock() on each TICK.
  */
 void
-set_freemem()
+set_freemem(void)
 {
 	struct pcf	*p;
 	ulong_t		t;
@@ -3920,8 +3920,8 @@
 
 /*
  * This routine reserves availrmem for npages;
- * 	flags: KM_NOSLEEP or KM_SLEEP
- * 	returns 1 on success or 0 on failure
+ *	flags: KM_NOSLEEP or KM_SLEEP
+ *	returns 1 on success or 0 on failure
  */
 int
 page_resv(pgcnt_t npages, uint_t flags)
@@ -3978,7 +3978,7 @@
 page_pp_useclaim(
 	page_t *opp,		/* original page frame losing lock */
 	page_t *npp,		/* new page frame gaining lock */
-	uint_t	write_perm) 	/* set if vpage has PROT_WRITE */
+	uint_t write_perm)	/* set if vpage has PROT_WRITE */
 {
 	int payback = 0;
 	int nidx, oidx;
@@ -4732,7 +4732,7 @@
 
 /*
  * returns
- * 0 		: on success and *nrelocp is number of relocated PAGESIZE pages
+ * 0		: on success and *nrelocp is number of relocated PAGESIZE pages
  * ERANGE	: this is not a base page
  * EBUSY	: failure to get locks on the page/pages
  * ENOMEM	: failure to obtain replacement pages