changeset 11171:5f22130fc712

6821270 NFSv4 open state id table exhausted 6854700 NFSv4 reaper_thread only runs at 5 minute intervals 6886843 spurious NFS4ERR_SERVERFAULT with 1M nodes in rfs4 db
author Rick Mesta <rick.mesta@sun.com>
date Mon, 23 Nov 2009 18:12:24 -0600
parents 349270d482cf
children a792f425ae2e
files usr/src/uts/common/fs/nfs/nfs4_db.c usr/src/uts/common/fs/nfs/nfs4_srv.c usr/src/uts/common/nfs/nfs4_db_impl.h
diffstat 3 files changed, 82 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/nfs/nfs4_db.c	Mon Nov 23 16:04:12 2009 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_db.c	Mon Nov 23 18:12:24 2009 -0600
@@ -32,6 +32,7 @@
 #include <rpc/rpc.h>
 #include <nfs/nfs4.h>
 #include <nfs/nfs4_db_impl.h>
+#include <sys/sdt.h>
 
 static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
 
@@ -40,6 +41,17 @@
 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
 static void rfs4_start_reaper(rfs4_table_t *);
 
+/*
+ * t_lowat - integer percentage of table entries	/etc/system only
+ * t_hiwat - integer percentage of table entries	/etc/system only
+ * t_lreap - integer percentage of table reap time	mdb or /etc/system
+ * t_hreap - integer percentage of table reap time	mdb or /etc/system
+ */
+uint32_t	t_lowat = 50;	/* reap at t_lreap when id's in use hit 50% */
+uint32_t	t_hiwat = 75;	/* reap at t_hreap when id's in use hit 75% */
+time_t		t_lreap = 50;	/* default to 50% of table's reap interval */
+time_t		t_hreap = 10;	/* default to 10% of table's reap interval */
+
 id_t
 rfs4_dbe_getid(rfs4_dbe_t *entry)
 {
@@ -246,10 +258,10 @@
     uint32_t size, uint32_t hashsize,
     uint32_t maxentries, id_t start)
 {
-	rfs4_table_t *table;
-	int len;
-	char *cache_name;
-	char *id_name;
+	rfs4_table_t	*table;
+	int		 len;
+	char		*cache_name;
+	char		*id_name;
 
 	table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
 	table->dbt_db = db;
@@ -283,6 +295,11 @@
 		    maxentries + start);
 		kmem_free(id_name, len + 10);
 	}
+	ASSERT(t_lowat != 0);
+	table->dbt_id_lwat = (maxentries * t_lowat) / 100;
+	ASSERT(t_hiwat != 0);
+	table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
+	table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
 	table->dbt_maxentries = maxentries;
 	table->dbt_create = create;
 	table->dbt_destroy = destroy;
@@ -500,18 +517,48 @@
 	return (entry);
 }
 
+static void
+rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
+{
+	clock_t		tabreap;
+	clock_t		reap_int;
+	uint32_t	in_use;
+
+	/*
+	 * Adjust the table's reap interval based on the
+	 * number of id's currently in use. Each table's
+	 * default remains the same if id usage subsides.
+	 */
+	ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
+	tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
+
+	in_use = table->dbt_count + 1;	/* see rfs4_dbe_create */
+	if (in_use >= table->dbt_id_hwat) {
+		ASSERT(t_hreap != 0);
+		reap_int = (tabreap * t_hreap) / 100;
+	} else if (in_use >= table->dbt_id_lwat) {
+		ASSERT(t_lreap != 0);
+		reap_int = (tabreap * t_lreap) / 100;
+	} else {
+		reap_int = tabreap;
+	}
+	table->dbt_id_reap = reap_int;
+	DTRACE_PROBE2(table__reap__interval, char *,
+	    table->dbt_name, time_t, table->dbt_id_reap);
+}
+
 rfs4_entry_t
 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
     rfs4_dbsearch_type_t dbsearch_type)
 {
-	int already_done;
-	uint32_t i;
-	rfs4_table_t *table = idx->dbi_table;
-	rfs4_index_t *ip;
-	rfs4_bucket_t *bp;
-	rfs4_link_t *l;
-	rfs4_dbe_t *entry;
-	id_t id = -1;
+	int		 already_done;
+	uint32_t	 i;
+	rfs4_table_t	*table = idx->dbi_table;
+	rfs4_index_t	*ip;
+	rfs4_bucket_t	*bp;
+	rfs4_link_t	*l;
+	rfs4_dbe_t	*entry;
+	id_t		 id = -1;
 
 	i = HASH(idx, key);
 	bp = &idx->dbi_buckets[i];
@@ -565,17 +612,18 @@
 	}
 
 	if (table->dbt_id_space && id == -1) {
-		/* get an id but don't sleep for it */
-		id = id_alloc_nosleep(table->dbt_id_space);
-		if (id == -1) {
-			rw_exit(bp->dbk_lock);
+		rw_exit(bp->dbk_lock);
+
+		/* get an id, ok to sleep for it here */
+		id = id_alloc(table->dbt_id_space);
+		ASSERT(id != -1);
 
-			/* get an id, ok to sleep for it here */
-			id = id_alloc(table->dbt_id_space);
+		mutex_enter(&table->dbt_reaper_cv_lock);
+		rfs4_dbe_tabreap_adjust(table);
+		mutex_exit(&table->dbt_reaper_cv_lock);
 
-			rw_enter(bp->dbk_lock, RW_WRITER);
-			goto retry;
-		}
+		rw_enter(bp->dbk_lock, RW_WRITER);
+		goto retry;
 	}
 
 	/* get an exclusive lock on the bucket */
@@ -799,12 +847,11 @@
 	    count, cache_time, table->dbt_name));
 }
 
-
 static void
 reaper_thread(caddr_t *arg)
 {
-	rfs4_table_t *table = (rfs4_table_t *)arg;
-	clock_t rc, time, wakeup;
+	rfs4_table_t	*table = (rfs4_table_t *)arg;
+	clock_t		 rc;
 
 	NFS4_DEBUG(table->dbt_debug,
 	    (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
@@ -812,14 +859,12 @@
 	CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
 	    callb_generic_cpr, "nfsv4Reaper");
 
-	time = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
-	wakeup = SEC_TO_TICK(time);
-
 	mutex_enter(&table->dbt_reaper_cv_lock);
 	do {
 		CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
 		rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
-		    &table->dbt_reaper_cv_lock, wakeup, TR_CLOCK_TICK);
+		    &table->dbt_reaper_cv_lock,
+		    SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
 		CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
 		    &table->dbt_reaper_cv_lock);
 		rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c	Mon Nov 23 16:04:12 2009 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c	Mon Nov 23 18:12:24 2009 -0600
@@ -6600,17 +6600,15 @@
 	/* get the file struct and hold a lock on it during initial open */
 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
 	if (fp == NULL) {
-		NFS4_DEBUG(rfs4_debug,
-		    (CE_NOTE, "rfs4_do_open: can't find file"));
-		resp->status = NFS4ERR_SERVERFAULT;
+		resp->status = NFS4ERR_RESOURCE;
+		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
 		return;
 	}
 
 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
 	if (sp == NULL) {
-		NFS4_DEBUG(rfs4_debug,
-		    (CE_NOTE, "rfs4_do_open: can't find state"));
 		resp->status = NFS4ERR_RESOURCE;
+		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
 		/* No need to keep any reference */
 		rw_exit(&fp->rf_file_rwlock);
 		rfs4_file_rele(fp);
@@ -7002,17 +7000,15 @@
 	/* get the file struct and hold a lock on it during initial open */
 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
 	if (fp == NULL) {
-		NFS4_DEBUG(rfs4_debug,
-		    (CE_NOTE, "rfs4_do_opendelprev: can't find file"));
-		resp->status = NFS4ERR_SERVERFAULT;
+		resp->status = NFS4ERR_RESOURCE;
+		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
 		return;
 	}
 
 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
 	if (sp == NULL) {
-		NFS4_DEBUG(rfs4_debug,
-		    (CE_NOTE, "rfs4_do_opendelprev: can't find state"));
 		resp->status = NFS4ERR_SERVERFAULT;
+		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
 		rw_exit(&fp->rf_file_rwlock);
 		rfs4_file_rele(fp);
 		return;
--- a/usr/src/uts/common/nfs/nfs4_db_impl.h	Mon Nov 23 16:04:12 2009 -0800
+++ b/usr/src/uts/common/nfs/nfs4_db_impl.h	Mon Nov 23 18:12:24 2009 -0600
@@ -104,6 +104,9 @@
 	uint32_t	dbt_idxcnt;		/* # of indices in table */
 	uint32_t	dbt_maxcnt;		/* max # of indices */
 	uint32_t	dbt_ccnt;		/* # of creatable entries */
+	uint32_t	dbt_id_lwat;		/* lo wtrmrk; 50% ids in use */
+	uint32_t	dbt_id_hwat;		/* hi wtrmrk; 75% ids in use */
+	time_t		dbt_id_reap;		/* table's reap interval */
 	rfs4_index_t	*dbt_indices;		/* list of indices */
 	/* Given entry and data construct entry */
 	bool_t (*dbt_create)(rfs4_entry_t, void *data);