From 8bbfa9f3889b643fc7de82c0c761ef17097f8faf Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:34 +1100 Subject: knfsd: remove the nfsd thread busy histogram Stop gathering the data that feeds the 'th' line in /proc/net/rpc/nfsd because the questionable data provided is not worth the scalability impact of calculating it. Instead, always report zeroes. The current approach suffers from three major issues: 1. update_thread_usage() increments buckets by call service time or call arrival time...in jiffies. On lightly loaded machines, call service times are usually < 1 jiffy; on heavily loaded machines call arrival times will be << 1 jiffy. So a large portion of the updates to the buckets are rounded down to zero, and the histogram is undercounting. 2. As seen previously on the nfs mailing list, the format in which the histogram is presented is cryptic, difficult to explain, and difficult to use. 3. Updating the histogram requires taking a global spinlock and dirtying the global variables nfsd_last_call, nfsd_busy, and nfsdstats *twice* on every RPC call, which is a significant scaling limitation. Testing on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate, shows the stats update code (inlined into nfsd()) takes about 1.7% of each CPU. This patch drops the contribution from nfsd() into the profile noise. This patch is a forward-ported version of knfsd-remove-nfsd-threadstats which has been shipping in the SGI "Enhanced NFS" product since 2006. In that time, exactly one customer has noticed that the threadstats were missing. It has been previously posted: http://article.gmane.org/gmane.linux.nfs/10376 and more recently requested to be posted again. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa..c3eb0759fd5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -40,9 +40,6 @@ extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); struct timeval nfssvc_boot; -static atomic_t nfsd_busy; -static unsigned long nfsd_last_call; -static DEFINE_SPINLOCK(nfsd_call_lock); /* * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members @@ -227,7 +224,6 @@ int nfsd_create_serv(void) nfsd_max_blksize /= 2; } - atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); @@ -376,26 +372,6 @@ nfsd_svc(unsigned short port, int nrservs) return error; } -static inline void -update_thread_usage(int busy_threads) -{ - unsigned long prev_call; - unsigned long diff; - int decile; - - spin_lock(&nfsd_call_lock); - prev_call = nfsd_last_call; - nfsd_last_call = jiffies; - decile = busy_threads*10/nfsdstats.th_cnt; - if (decile>0 && decile <= 10) { - diff = nfsd_last_call - prev_call; - if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) - nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; - if (decile == 10) - nfsdstats.th_fullcnt++; - } - spin_unlock(&nfsd_call_lock); -} /* * This is the NFS server kernel thread @@ -464,8 +440,6 @@ nfsd(void *vrqstp) continue; } - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_inc(&nfsd_busy); /* Lock the export hash tables for reading. */ exp_readlock(); @@ -474,8 +448,6 @@ nfsd(void *vrqstp) /* Unlock export hash tables */ exp_readunlock(); - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_dec(&nfsd_busy); } /* Clear signals before calling svc_exit_thread() */ -- cgit v1.2.3 From 03cf6c9f49a8fea953d38648d016e3f46e814991 Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:36 +1100 Subject: knfsd: add file to export stats about nfsd pools Add /proc/fs/nfsd/pool_stats to export to userspace various statistics about the operation of rpc server thread pools. This patch is based on a forward-ported version of knfsd-add-pool-thread-stats which has been shipping in the SGI "Enhanced NFS" product since 2006 and which was previously posted: http://article.gmane.org/gmane.linux.nfs/10375 It has also been updated thus: * moved EXPORT_SYMBOL() to near the function it exports * made the new struct struct seq_operations const * used SEQ_START_TOKEN instead of ((void *)1) * merged fix from SGI PV 990526 "sunrpc: use dprintk instead of printk in svc_pool_stats_*()" by Harshula Jayasuriya. * merged fix from SGI PV 964001 "Crash reading pool_stats before nfsds are started". Signed-off-by: Greg Banks Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c3eb0759fd5..ef0a3686639 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -546,3 +546,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); return 1; } + +int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + if (nfsd_serv == NULL) + return -ENODEV; + return svc_pool_stats_open(nfsd_serv, file); +} -- cgit v1.2.3 From 074fe897536f095309c5aaffcf912952882ab2cb Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:15 +0300 Subject: nfsd41: DRC save, restore, and clear functions Cache all the result pages, including the rpc header in rq_respages[0], for a request in the slot table cache entry. Cache the statp pointer from nfsd_dispatch which points into rq_respages[0] just past the rpc header. When setting a cache entry, calculate and save the length of the nfs data minus the rpc header for rq_respages[0]. When replaying a cache entry, replace the cached rpc header with the replayed request rpc result header, unless there is not enough room in the cached results first page. In that case, use the cached rpc header. The sessions fore channel maxresponse size cached is set to NFSD_PAGES_PER_SLOT * PAGE_SIZE. For compounds we are cacheing with operations such as READDIR that use the xdr_buf->pages to hold data, we choose to cache the extra page of data rather than copying data from xdr_buf->pages into the xdr_buf->head page. [nfsd41: limit cache to maxresponsesize_cached] [nfsd41: mv nfsd4_set_statp under CONFIG_NFSD_V4_1] [nfsd41: rename nfsd4_move_pages] [nfsd41: rename page_no variable] [nfsd41: rename nfsd4_set_cache_entry] [nfsd41: fix nfsd41_copy_replay_data comment] [nfsd41: add to nfsd4_set_cache_entry] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ef0a3686639..b5168d1898e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -515,6 +515,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + rqstp->rq_res.head[0].iov_len; rqstp->rq_res.head[0].iov_len += sizeof(__be32); + /* NFSv4.1 DRC requires statp */ + if (rqstp->rq_vers == 4) + nfsd4_set_statp(rqstp, statp); + /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); -- cgit v1.2.3 From c3d06f9ce8544fecfe13e377d1e2c2e47fe18dbc Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:18 +0300 Subject: nfsd41: hard page limit for DRC Use no more than 1/128th of the number of free pages at nfsd startup for the v4.1 DRC. This is an arbitrary default which should probably end up under the control of an administrator. Signed-off-by: Andy Adamson [moved added fields in struct svc_serv under CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy [fix set_max_drc calculation of sv_drc_max_pages] [moved NFSD_DRC_SIZE_SHIFT's declaration up in header file] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b5168d1898e..b53a098e97a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -197,6 +198,26 @@ void nfsd_reset_versions(void) } } +/* + * Each session guarantees a negotiated per slot memory cache for replies + * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated + * NFSv4.1 server might want to use more memory for a DRC than a machine + * with mutiple services. + * + * Impose a hard limit on the number of pages for the DRC which varies + * according to the machines free pages. This is of course only a default. + * + * For now this is a #defined shift which could be under admin control + * in the future. + */ +static void set_max_drc(void) +{ + nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT; + nfsd_serv->sv_drc_pages_used = 0; + dprintk("%s svc_drc_max_pages %u\n", __func__, + nfsd_serv->sv_drc_max_pages); +} int nfsd_create_serv(void) { @@ -229,6 +250,8 @@ int nfsd_create_serv(void) nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; + else + set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; -- cgit v1.2.3 From 8daf220a6a83c47b9648c28bb819c14c60bad7f9 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:59 +0300 Subject: nfsd41: control nfsv4.1 svc via /proc/fs/nfsd/versions Support enabling and disabling nfsv4.1 via /proc/fs/nfsd/versions by writing the strings "+4.1" or "-4.1" correspondingly. Use user mode nfs-utils (rpc.nfsd option) to enable. This will allow us to get rid of CONFIG_NFSD_V4_1 [nfsd41: disable support for minorversion by default] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b53a098e97a..e9d57734a34 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -121,6 +121,8 @@ struct svc_program nfsd_program = { }; +u32 nfsd_supported_minorversion; + int nfsd_vers(int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) @@ -147,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change) } return 0; } + +int nfsd_minorversion(u32 minorversion, enum vers_op change) +{ + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + return -1; + switch(change) { + case NFSD_SET: + nfsd_supported_minorversion = minorversion; + break; + case NFSD_CLEAR: + if (minorversion == 0) + return -1; + nfsd_supported_minorversion = minorversion - 1; + break; + case NFSD_TEST: + return minorversion <= nfsd_supported_minorversion; + case NFSD_AVAIL: + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + } + return 0; +} + /* * Maximum number of nfsd processes */ -- cgit v1.2.3 From f0ad670d7061efad138df19aefe569263c4ea37b Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 6 Apr 2009 12:00:36 +0300 Subject: nfsd41: define NFSD_DRC_SIZE_SHIFT in set_max_drc Fixes the following compiler error: fs/nfsd/nfssvc.c: In function 'set_max_drc': fs/nfsd/nfssvc.c:240: error: 'NFSD_DRC_SIZE_SHIFT' undeclared CONFIG_NFSD_V4 is not set Reported-by: Alexander Beregalov Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e9d57734a34..469c931cca9 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -236,6 +236,8 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { + /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; nfsd_serv->sv_drc_pages_used = 0; -- cgit v1.2.3