Mercurial > illumos > illumos-gate
changeset 11171:5f22130fc712
6821270 NFSv4 open state id table exhausted
6854700 NFSv4 reaper_thread only runs at 5 minute intervals
6886843 spurious NFS4ERR_SERVERFAULT with 1M nodes in rfs4 db
author | Rick Mesta <rick.mesta@sun.com> |
---|---|
date | Mon, 23 Nov 2009 18:12:24 -0600 |
parents | 349270d482cf |
children | a792f425ae2e |
files | usr/src/uts/common/fs/nfs/nfs4_db.c usr/src/uts/common/fs/nfs/nfs4_srv.c usr/src/uts/common/nfs/nfs4_db_impl.h |
diffstat | 3 files changed, 82 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/nfs/nfs4_db.c Mon Nov 23 16:04:12 2009 -0800 +++ b/usr/src/uts/common/fs/nfs/nfs4_db.c Mon Nov 23 18:12:24 2009 -0600 @@ -32,6 +32,7 @@ #include <rpc/rpc.h> #include <nfs/nfs4.h> #include <nfs/nfs4_db_impl.h> +#include <sys/sdt.h> static int rfs4_reap_interval = RFS4_REAP_INTERVAL; @@ -40,6 +41,17 @@ static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t); static void rfs4_start_reaper(rfs4_table_t *); +/* + * t_lowat - integer percentage of table entries /etc/system only + * t_hiwat - integer percentage of table entries /etc/system only + * t_lreap - integer percentage of table reap time mdb or /etc/system + * t_hreap - integer percentage of table reap time mdb or /etc/system + */ +uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */ +uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */ +time_t t_lreap = 50; /* default to 50% of table's reap interval */ +time_t t_hreap = 10; /* default to 10% of table's reap interval */ + id_t rfs4_dbe_getid(rfs4_dbe_t *entry) { @@ -246,10 +258,10 @@ uint32_t size, uint32_t hashsize, uint32_t maxentries, id_t start) { - rfs4_table_t *table; - int len; - char *cache_name; - char *id_name; + rfs4_table_t *table; + int len; + char *cache_name; + char *id_name; table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP); table->dbt_db = db; @@ -283,6 +295,11 @@ maxentries + start); kmem_free(id_name, len + 10); } + ASSERT(t_lowat != 0); + table->dbt_id_lwat = (maxentries * t_lowat) / 100; + ASSERT(t_hiwat != 0); + table->dbt_id_hwat = (maxentries * t_hiwat) / 100; + table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time); table->dbt_maxentries = maxentries; table->dbt_create = create; table->dbt_destroy = destroy; @@ -500,18 +517,48 @@ return (entry); } +static void +rfs4_dbe_tabreap_adjust(rfs4_table_t *table) +{ + clock_t tabreap; + clock_t reap_int; + uint32_t in_use; + + /* + * Adjust the table's reap interval based on the + * number of id's currently in use. Each table's + * default remains the same if id usage subsides. + */ + ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock)); + tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time); + + in_use = table->dbt_count + 1; /* see rfs4_dbe_create */ + if (in_use >= table->dbt_id_hwat) { + ASSERT(t_hreap != 0); + reap_int = (tabreap * t_hreap) / 100; + } else if (in_use >= table->dbt_id_lwat) { + ASSERT(t_lreap != 0); + reap_int = (tabreap * t_lreap) / 100; + } else { + reap_int = tabreap; + } + table->dbt_id_reap = reap_int; + DTRACE_PROBE2(table__reap__interval, char *, + table->dbt_name, time_t, table->dbt_id_reap); +} + rfs4_entry_t rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg, rfs4_dbsearch_type_t dbsearch_type) { - int already_done; - uint32_t i; - rfs4_table_t *table = idx->dbi_table; - rfs4_index_t *ip; - rfs4_bucket_t *bp; - rfs4_link_t *l; - rfs4_dbe_t *entry; - id_t id = -1; + int already_done; + uint32_t i; + rfs4_table_t *table = idx->dbi_table; + rfs4_index_t *ip; + rfs4_bucket_t *bp; + rfs4_link_t *l; + rfs4_dbe_t *entry; + id_t id = -1; i = HASH(idx, key); bp = &idx->dbi_buckets[i]; @@ -565,17 +612,18 @@ } if (table->dbt_id_space && id == -1) { - /* get an id but don't sleep for it */ - id = id_alloc_nosleep(table->dbt_id_space); - if (id == -1) { - rw_exit(bp->dbk_lock); + rw_exit(bp->dbk_lock); + + /* get an id, ok to sleep for it here */ + id = id_alloc(table->dbt_id_space); + ASSERT(id != -1); - /* get an id, ok to sleep for it here */ - id = id_alloc(table->dbt_id_space); + mutex_enter(&table->dbt_reaper_cv_lock); + rfs4_dbe_tabreap_adjust(table); + mutex_exit(&table->dbt_reaper_cv_lock); - rw_enter(bp->dbk_lock, RW_WRITER); - goto retry; - } + rw_enter(bp->dbk_lock, RW_WRITER); + goto retry; } /* get an exclusive lock on the bucket */ @@ -799,12 +847,11 @@ count, cache_time, table->dbt_name)); } - static void reaper_thread(caddr_t *arg) { - rfs4_table_t *table = (rfs4_table_t *)arg; - clock_t rc, time, wakeup; + rfs4_table_t *table = (rfs4_table_t *)arg; + clock_t rc; NFS4_DEBUG(table->dbt_debug, (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name)); @@ -812,14 +859,12 @@ CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock, callb_generic_cpr, "nfsv4Reaper"); - time = MIN(rfs4_reap_interval, table->dbt_max_cache_time); - wakeup = SEC_TO_TICK(time); - mutex_enter(&table->dbt_reaper_cv_lock); do { CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info); rc = cv_reltimedwait_sig(&table->dbt_reaper_wait, - &table->dbt_reaper_cv_lock, wakeup, TR_CLOCK_TICK); + &table->dbt_reaper_cv_lock, + SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK); CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock); rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c Mon Nov 23 16:04:12 2009 -0800 +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c Mon Nov 23 18:12:24 2009 -0600 @@ -6600,17 +6600,15 @@ /* get the file struct and hold a lock on it during initial open */ fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate); if (fp == NULL) { - NFS4_DEBUG(rfs4_debug, - (CE_NOTE, "rfs4_do_open: can't find file")); - resp->status = NFS4ERR_SERVERFAULT; + resp->status = NFS4ERR_RESOURCE; + DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status); return; } sp = rfs4_findstate_by_owner_file(oo, fp, &screate); if (sp == NULL) { - NFS4_DEBUG(rfs4_debug, - (CE_NOTE, "rfs4_do_open: can't find state")); resp->status = NFS4ERR_RESOURCE; + DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status); /* No need to keep any reference */ rw_exit(&fp->rf_file_rwlock); rfs4_file_rele(fp); @@ -7002,17 +7000,15 @@ /* get the file struct and hold a lock on it during initial open */ fp = rfs4_findfile_withlock(cs->vp, NULL, &create); if (fp == NULL) { - NFS4_DEBUG(rfs4_debug, - (CE_NOTE, "rfs4_do_opendelprev: can't find file")); - resp->status = NFS4ERR_SERVERFAULT; + resp->status = NFS4ERR_RESOURCE; + DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status); return; } sp = rfs4_findstate_by_owner_file(oo, fp, &create); if (sp == NULL) { - NFS4_DEBUG(rfs4_debug, - (CE_NOTE, "rfs4_do_opendelprev: can't find state")); resp->status = NFS4ERR_SERVERFAULT; + DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status); rw_exit(&fp->rf_file_rwlock); rfs4_file_rele(fp); return;
--- a/usr/src/uts/common/nfs/nfs4_db_impl.h Mon Nov 23 16:04:12 2009 -0800 +++ b/usr/src/uts/common/nfs/nfs4_db_impl.h Mon Nov 23 18:12:24 2009 -0600 @@ -104,6 +104,9 @@ uint32_t dbt_idxcnt; /* # of indices in table */ uint32_t dbt_maxcnt; /* max # of indices */ uint32_t dbt_ccnt; /* # of creatable entries */ + uint32_t dbt_id_lwat; /* lo wtrmrk; 50% ids in use */ + uint32_t dbt_id_hwat; /* hi wtrmrk; 75% ids in use */ + time_t dbt_id_reap; /* table's reap interval */ rfs4_index_t *dbt_indices; /* list of indices */ /* Given entry and data construct entry */ bool_t (*dbt_create)(rfs4_entry_t, void *data);