diff options
43 files changed, 879 insertions, 517 deletions
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt new file mode 100644 index 00000000000..1ae34879574 --- /dev/null +++ b/Documentation/filesystems/nfs-rdma.txt @@ -0,0 +1,252 @@ +################################################################################ +# # +# NFS/RDMA README # +# # +################################################################################ + + Author: NetApp and Open Grid Computing + Date: February 25, 2008 + +Table of Contents +~~~~~~~~~~~~~~~~~ + - Overview + - Getting Help + - Installation + - Check RDMA and NFS Setup + - NFS/RDMA Setup + +Overview +~~~~~~~~ + + This document describes how to install and setup the Linux NFS/RDMA client + and server software. + + The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server + was first included in the following release, Linux 2.6.25. + + In our testing, we have obtained excellent performance results (full 10Gbit + wire bandwidth at minimal client CPU) under many workloads. The code passes + the full Connectathon test suite and operates over both Infiniband and iWARP + RDMA adapters. + +Getting Help +~~~~~~~~~~~~ + + If you get stuck, you can ask questions on the + + nfs-rdma-devel@lists.sourceforge.net + + mailing list. + +Installation +~~~~~~~~~~~~ + + These instructions are a step by step guide to building a machine for + use with NFS/RDMA. + + - Install an RDMA device + + Any device supported by the drivers in drivers/infiniband/hw is acceptable. + + Testing has been performed using several Mellanox-based IB cards, the + Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter. + + - Install a Linux distribution and tools + + The first kernel release to contain both the NFS/RDMA client and server was + Linux 2.6.25 Therefore, a distribution compatible with this and subsequent + Linux kernel release should be installed. + + The procedures described in this document have been tested with + distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). + + - Install nfs-utils-1.1.1 or greater on the client + + An NFS/RDMA mount point can only be obtained by using the mount.nfs + command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs + you are using, type: + + > /sbin/mount.nfs -V + + If the version is less than 1.1.1 or the command does not exist, + then you will need to install the latest version of nfs-utils. + + Download the latest package from: + + http://www.kernel.org/pub/linux/utils/nfs + + Uncompress the package and follow the installation instructions. + + If you will not be using GSS and NFSv4, the installation process + can be simplified by disabling these features when running configure: + + > ./configure --disable-gss --disable-nfsv4 + + For more information on this see the package's README and INSTALL files. + + After building the nfs-utils package, there will be a mount.nfs binary in + the utils/mount directory. This binary can be used to initiate NFS v2, v3, + or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. + The standard technique is to create a symlink called mount.nfs4 to mount.nfs. + + NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed + on the NFS client machine. You do not need this specific version of + nfs-utils on the server. Furthermore, only the mount.nfs command from + nfs-utils-1.1.1 is needed on the client. + + - Install a Linux kernel with NFS/RDMA + + The NFS/RDMA client and server are both included in the mainline Linux + kernel version 2.6.25 and later. This and other versions of the 2.6 Linux + kernel can be found at: + + ftp://ftp.kernel.org/pub/linux/kernel/v2.6/ + + Download the sources and place them in an appropriate location. + + - Configure the RDMA stack + + Make sure your kernel configuration has RDMA support enabled. Under + Device Drivers -> InfiniBand support, update the kernel configuration + to enable InfiniBand support [NOTE: the option name is misleading. Enabling + InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)]. + + Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or + iWARP adapter support (amso, cxgb3, etc.). + + If you are using InfiniBand, be sure to enable IP-over-InfiniBand support. + + - Configure the NFS client and server + + Your kernel configuration must also have NFS file system support and/or + NFS server support enabled. These and other NFS related configuration + options can be found under File Systems -> Network File Systems. + + - Build, install, reboot + + The NFS/RDMA code will be enabled automatically if NFS and RDMA + are turned on. The NFS/RDMA client and server are configured via the hidden + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The + value of SUNRPC_XPRT_RDMA will be: + + - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client + and server will not be built + - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M, + in this case the NFS/RDMA client and server will be built as modules + - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client + and server will be built into the kernel + + Therefore, if you have followed the steps above and turned no NFS and RDMA, + the NFS/RDMA client and server will be built. + + Build a new kernel, install it, boot it. + +Check RDMA and NFS Setup +~~~~~~~~~~~~~~~~~~~~~~~~ + + Before configuring the NFS/RDMA software, it is a good idea to test + your new kernel to ensure that the kernel is working correctly. + In particular, it is a good idea to verify that the RDMA stack + is functioning as expected and standard NFS over TCP/IP and/or UDP/IP + is working properly. + + - Check RDMA Setup + + If you built the RDMA components as modules, load them at + this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel + card: + + > modprobe ib_mthca + > modprobe ib_ipoib + + If you are using InfiniBand, make sure there is a Subnet Manager (SM) + running on the network. If your IB switch has an embedded SM, you can + use it. Otherwise, you will need to run an SM, such as OpenSM, on one + of your end nodes. + + If an SM is running on your network, you should see the following: + + > cat /sys/class/infiniband/driverX/ports/1/state + 4: ACTIVE + + where driverX is mthca0, ipath5, ehca3, etc. + + To further test the InfiniBand software stack, use IPoIB (this + assumes you have two IB hosts named host1 and host2): + + host1> ifconfig ib0 a.b.c.x + host2> ifconfig ib0 a.b.c.y + host1> ping a.b.c.y + host2> ping a.b.c.x + + For other device types, follow the appropriate procedures. + + - Check NFS Setup + + For the NFS components enabled above (client and/or server), + test their functionality over standard Ethernet using TCP/IP or UDP/IP. + +NFS/RDMA Setup +~~~~~~~~~~~~~~ + + We recommend that you use two machines, one to act as the client and + one to act as the server. + + One time configuration: + + - On the server system, configure the /etc/exports file and + start the NFS/RDMA server. + + Exports entries with the following format have been tested: + + /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash) + + Here the first IP address is the client's Ethernet address and the second + IP address is the clients IPoIB address. + + Each time a machine boots: + + - Load and configure the RDMA drivers + + For InfiniBand using a Mellanox adapter: + + > modprobe ib_mthca + > modprobe ib_ipoib + > ifconfig ib0 a.b.c.d + + NOTE: use unique addresses for the client and server + + - Start the NFS server + + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA transport module: + + > modprobe svcrdma + + Regardless of how the server was built (module or built-in), start the server: + + > /etc/init.d/nfs start + + or + + > service nfs start + + Instruct the server to listen on the RDMA transport: + + > echo rdma 2050 > /proc/fs/nfsd/portlist + + - On the client system + + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA client module: + + > modprobe xprtrdma.ko + + Regardless of how the client was built (module or built-in), issue the mount.nfs command: + + > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050 + + To verify that the mount is using RDMA, run "cat /proc/mounts" and check the + "proto" field for the given mount. + + Congratulations! You're using NFS/RDMA! diff --git a/fs/Kconfig b/fs/Kconfig index 8b18a875867..56c83f40cdb 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -411,7 +411,7 @@ config JFS_STATISTICS to be made available to the user in the /proc/fs/jfs/ directory. config FS_POSIX_ACL -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs) +# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) # # NOTE: you can implement Posix ACLs without these helpers (XFS does). # Never use this symbol for ifdefs. @@ -1694,75 +1694,80 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS - select NFSD_V2_ACL if NFSD_V3_ACL select NFS_ACL_SUPPORT if NFSD_V2_ACL - select NFSD_TCP if NFSD_V4 - select CRYPTO_MD5 if NFSD_V4 - select CRYPTO if NFSD_V4 - select FS_POSIX_ACL if NFSD_V4 - select PROC_FS if NFSD_V4 - select PROC_FS if SUNRPC_GSS - help - If you want your Linux box to act as an NFS *server*, so that other - computers on your local network which support NFS can access certain - directories on your box transparently, you have two options: you can - use the self-contained user space program nfsd, in which case you - should say N here, or you can say Y and use the kernel based NFS - server. The advantage of the kernel based solution is that it is - faster. - - In either case, you will need support software; the respective - locations are given in the file <file:Documentation/Changes> in the - NFS section. - - If you say Y here, you will get support for version 2 of the NFS - protocol (NFSv2). If you also want NFSv3, say Y to the next question - as well. - - Please read the NFS-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. - - To compile the NFS server support as a module, choose M here: the - module will be called nfsd. If unsure, say N. + help + Choose Y here if you want to allow other computers to access + files residing on this system using Sun's Network File System + protocol. To compile the NFS server support as a module, + choose M here: the module will be called nfsd. + + You may choose to use a user-space NFS server instead, in which + case you can choose N here. + + To export local file systems using NFS, you also need to install + user space programs which can be found in the Linux nfs-utils + package, available from http://linux-nfs.org/. More detail about + the Linux NFS server implementation is available via the + exports(5) man page. + + Below you can choose which versions of the NFS protocol are + available to clients mounting the NFS server on this system. + Support for NFS version 2 (RFC 1094) is always available when + CONFIG_NFSD is selected. + + If unsure, say N. config NFSD_V2_ACL bool depends on NFSD config NFSD_V3 - bool "Provide NFSv3 server support" + bool "NFS server support for NFS version 3" depends on NFSD help - If you would like to include the NFSv3 server as well as the NFSv2 - server, say Y here. If unsure, say Y. + This option enables support in your system's NFS server for + version 3 of the NFS protocol (RFC 1813). + + If unsure, say Y. config NFSD_V3_ACL - bool "Provide server support for the NFSv3 ACL protocol extension" + bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD_V3 + select NFSD_V2_ACL help - Implement the NFSv3 ACL protocol extension for manipulating POSIX - Access Control Lists on exported file systems. NFS clients should - be compiled with the NFSv3 ACL protocol extension; see the - CONFIG_NFS_V3_ACL option. If unsure, say N. + Solaris NFS servers support an auxiliary NFSv3 ACL protocol that + never became an official part of the NFS version 3 protocol. + This protocol extension allows applications on NFS clients to + manipulate POSIX Access Control Lists on files residing on NFS + servers. NFS servers enforce POSIX ACLs on local files whether + this protocol is available or not. + + This option enables support in your system's NFS server for the + NFSv3 ACL protocol extension allowing NFS clients to manipulate + POSIX ACLs on files exported by your system's NFS server. NFS + clients which support the Solaris NFSv3 ACL protocol can then + access and modify ACLs on your NFS server. + + To store ACLs on your NFS server, you also need to enable ACL- + related CONFIG options for your local file systems of choice. + + If unsure, say N. config NFSD_V4 - bool "Provide NFSv4 server support (EXPERIMENTAL)" - depends on NFSD && NFSD_V3 && EXPERIMENTAL + bool "NFS server support for NFS version 4 (EXPERIMENTAL)" + depends on NFSD && PROC_FS && EXPERIMENTAL + select NFSD_V3 + select FS_POSIX_ACL select RPCSEC_GSS_KRB5 help - If you would like to include the NFSv4 server as well as the NFSv2 - and NFSv3 servers, say Y here. This feature is experimental, and - should only be used if you are interested in helping to test NFSv4. - If unsure, say N. + This option enables support in your system's NFS server for + version 4 of the NFS protocol (RFC 3530). -config NFSD_TCP - bool "Provide NFS server over TCP support" - depends on NFSD - default y - help - If you want your NFS server to support TCP connections, say Y here. - TCP connections usually perform better than the default UDP when - the network is lossy or congested. If unsure, say Y. + To export files using NFSv4, you need to install additional user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. + + If unsure, say N. config ROOT_NFS bool "Root file system on NFS" diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f1ef49fff11..c7854791898 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -19,12 +19,11 @@ #define NLMDBG_FACILITY NLMDBG_HOSTCACHE -#define NLM_HOST_MAX 64 #define NLM_HOST_NRHASH 32 #define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) #define NLM_HOST_REBIND (60 * HZ) -#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) -#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) +#define NLM_HOST_EXPIRE (300 * HZ) +#define NLM_HOST_COLLECT (120 * HZ) static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; static unsigned long next_gc; @@ -142,9 +141,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, INIT_LIST_HEAD(&host->h_granted); INIT_LIST_HEAD(&host->h_reclaim); - if (++nrhosts > NLM_HOST_MAX) - next_gc = 0; - + nrhosts++; out: mutex_unlock(&nlm_host_mutex); return host; @@ -460,7 +457,7 @@ nlm_gc_hosts(void) * Manage NSM handles */ static LIST_HEAD(nsm_handles); -static DEFINE_MUTEX(nsm_mutex); +static DEFINE_SPINLOCK(nsm_lock); static struct nsm_handle * __nsm_find(const struct sockaddr_in *sin, @@ -468,7 +465,7 @@ __nsm_find(const struct sockaddr_in *sin, int create) { struct nsm_handle *nsm = NULL; - struct list_head *pos; + struct nsm_handle *pos; if (!sin) return NULL; @@ -482,38 +479,43 @@ __nsm_find(const struct sockaddr_in *sin, return NULL; } - mutex_lock(&nsm_mutex); - list_for_each(pos, &nsm_handles) { - nsm = list_entry(pos, struct nsm_handle, sm_link); +retry: + spin_lock(&nsm_lock); + list_for_each_entry(pos, &nsm_handles, sm_link) { if (hostname && nsm_use_hostnames) { - if (strlen(nsm->sm_name) != hostname_len - || memcmp(nsm->sm_name, hostname, hostname_len)) + if (strlen(pos->sm_name) != hostname_len + || memcmp(pos->sm_name, hostname, hostname_len)) continue; - } else if (!nlm_cmp_addr(&nsm->sm_addr, sin)) + } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) continue; - atomic_inc(&nsm->sm_count); - goto out; + atomic_inc(&pos->sm_count); + kfree(nsm); + nsm = pos; + goto found; } - - if (!create) { - nsm = NULL; - goto out; + if (nsm) { + list_add(&nsm->sm_link, &nsm_handles); + goto found; } + spin_unlock(&nsm_lock); + + if (!create) + return NULL; nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); - if (nsm != NULL) { - nsm->sm_addr = *sin; - nsm->sm_name = (char *) (nsm + 1); - memcpy(nsm->sm_name, hostname, hostname_len); - nsm->sm_name[hostname_len] = '\0'; - atomic_set(&nsm->sm_count, 1); + if (nsm == NULL) + return NULL; - list_add(&nsm->sm_link, &nsm_handles); - } + nsm->sm_addr = *sin; + nsm->sm_name = (char *) (nsm + 1); + memcpy(nsm->sm_name, hostname, hostname_len); + nsm->sm_name[hostname_len] = '\0'; + atomic_set(&nsm->sm_count, 1); + goto retry; -out: - mutex_unlock(&nsm_mutex); +found: + spin_unlock(&nsm_lock); return nsm; } @@ -532,12 +534,9 @@ nsm_release(struct nsm_handle *nsm) { if (!nsm) return; - if (atomic_dec_and_test(&nsm->sm_count)) { - mutex_lock(&nsm_mutex); - if (atomic_read(&nsm->sm_count) == 0) { - list_del(&nsm->sm_link); - kfree(nsm); - } - mutex_unlock(&nsm_mutex); + if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) { + list_del(&nsm->sm_link); + spin_unlock(&nsm_lock); + kfree(nsm); } } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1ed8bd4de94..cf977bbcf30 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -25,6 +25,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/mutex.h> +#include <linux/kthread.h> #include <linux/freezer.h> #include <linux/sunrpc/types.h> @@ -48,14 +49,11 @@ EXPORT_SYMBOL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; -static pid_t nlmsvc_pid; +static struct task_struct *nlmsvc_task; static struct svc_serv *nlmsvc_serv; int nlmsvc_grace_period; unsigned long nlmsvc_timeout; -static DECLARE_COMPLETION(lockd_start_done); -static DECLARE_WAIT_QUEUE_HEAD(lockd_exit); - /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 @@ -111,35 +109,30 @@ static inline void clear_grace_period(void) /* * This is the lockd kernel thread */ -static void -lockd(struct svc_rqst *rqstp) +static int +lockd(void *vrqstp) { - int err = 0; + int err = 0, preverr = 0; + struct svc_rqst *rqstp = vrqstp; unsigned long grace_period_expire; - /* Lock module and set up kernel thread */ - /* lockd_up is waiting for us to startup, so will - * be holding a reference to this module, so it - * is safe to just claim another reference - */ - __module_get(THIS_MODULE); - lock_kernel(); - - /* - * Let our maker know we're running. - */ - nlmsvc_pid = current->pid; - nlmsvc_serv = rqstp->rq_server; - complete(&lockd_start_done); - - daemonize("lockd"); + /* try_to_freeze() is called from svc_recv() */ set_freezable(); - /* Process request with signals blocked, but allow SIGKILL. */ + /* Allow SIGKILL to tell lockd to drop all of its locks */ allow_signal(SIGKILL); dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); + /* + * FIXME: it would be nice if lockd didn't spend its entire life + * running under the BKL. At the very least, it would be good to + * have someone clarify what it's intended to protect here. I've + * seen some handwavy posts about posix locking needing to be + * done under the BKL, but it's far from clear. + */ + lock_kernel(); + if (!nlm_timeout) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; @@ -148,10 +141,9 @@ lockd(struct svc_rqst *rqstp) /* * The main request loop. We don't terminate until the last - * NFS mount or NFS daemon has gone away, and we've been sent a - * signal, or else another process has taken over our job. + * NFS mount or NFS daemon has gone away. */ - while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { + while (!kthread_should_stop()) { long timeout = MAX_SCHEDULE_TIMEOUT; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); @@ -161,6 +153,7 @@ lockd(struct svc_rqst *rqstp) nlmsvc_invalidate_all(); grace_period_expire = set_grace_period(); } + continue; } /* @@ -179,14 +172,20 @@ lockd(struct svc_rqst *rqstp) * recvfrom routine. */ err = svc_recv(rqstp, timeout); - if (err == -EAGAIN || err == -EINTR) + if (err == -EAGAIN || err == -EINTR) { + preverr = err; continue; + } if (err < 0) { - printk(KERN_WARNING - "lockd: terminating on error %d\n", - -err); - break; + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, err); + preverr = err; + } + schedule_timeout_interruptible(HZ); + continue; } + preverr = err; dprintk("lockd: request from %s\n", svc_print_addr(rqstp, buf, sizeof(buf))); @@ -195,28 +194,19 @@ lockd(struct svc_rqst *rqstp) } flush_signals(current); + if (nlmsvc_ops) + nlmsvc_invalidate_all(); + nlm_shutdown_hosts(); - /* - * Check whether there's a new lockd process before - * shutting down the hosts and clearing the slot. - */ - if (!nlmsvc_pid || current->pid == nlmsvc_pid) { - if (nlmsvc_ops) - nlmsvc_invalidate_all(); - nlm_shutdown_hosts(); - nlmsvc_pid = 0; - nlmsvc_serv = NULL; - } else - printk(KERN_DEBUG - "lockd: new process, skipping host shutdown\n"); - wake_up(&lockd_exit); + unlock_kernel(); + + nlmsvc_task = NULL; + nlmsvc_serv = NULL; /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* Release module */ - unlock_kernel(); - module_put_and_exit(0); + return 0; } /* @@ -261,14 +251,15 @@ static int make_socks(struct svc_serv *serv, int proto) int lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ { - struct svc_serv * serv; - int error = 0; + struct svc_serv *serv; + struct svc_rqst *rqstp; + int error = 0; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_pid) { + if (nlmsvc_serv) { if (proto) error = make_socks(nlmsvc_serv, proto); goto out; @@ -295,13 +286,28 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ /* * Create the kernel thread and wait for it to start. */ - error = svc_create_thread(lockd, serv); - if (error) { + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) { + error = PTR_ERR(rqstp); + printk(KERN_WARNING + "lockd_up: svc_rqst allocation failed, error=%d\n", + error); + goto destroy_and_out; + } + + svc_sock_update_bufs(serv); + nlmsvc_serv = rqstp->rq_server; + + nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); + if (IS_ERR(nlmsvc_task)) { + error = PTR_ERR(nlmsvc_task); + nlmsvc_task = NULL; + nlmsvc_serv = NULL; printk(KERN_WARNING - "lockd_up: create thread failed, error=%d\n", error); + "lockd_up: kthread_run failed, error=%d\n", error); + svc_exit_thread(rqstp); goto destroy_and_out; } - wait_for_completion(&lockd_start_done); /* * Note: svc_serv structures have an initial use count of 1, @@ -323,37 +329,21 @@ EXPORT_SYMBOL(lockd_up); void lockd_down(void) { - static int warned; - mutex_lock(&nlmsvc_mutex); if (nlmsvc_users) { if (--nlmsvc_users) goto out; - } else - printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid); - - if (!nlmsvc_pid) { - if (warned++ == 0) - printk(KERN_WARNING "lockd_down: no lockd running.\n"); - goto out; + } else { + printk(KERN_ERR "lockd_down: no users! task=%p\n", + nlmsvc_task); + BUG(); } - warned = 0; - kill_proc(nlmsvc_pid, SIGKILL, 1); - /* - * Wait for the lockd process to exit, but since we're holding - * the lockd semaphore, we can't wait around forever ... - */ - clear_thread_flag(TIF_SIGPENDING); - interruptible_sleep_on_timeout(&lockd_exit, HZ); - if (nlmsvc_pid) { - printk(KERN_WARNING - "lockd_down: lockd failed to exit, clearing pid\n"); - nlmsvc_pid = 0; + if (!nlmsvc_task) { + printk(KERN_ERR "lockd_down: no lockd running.\n"); + BUG(); } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + kthread_stop(nlmsvc_task); out: mutex_unlock(&nlmsvc_mutex); } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fe9bdb4a220..1f122c1940a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -29,6 +29,7 @@ #include <linux/sunrpc/svc.h> #include <linux/lockd/nlm.h> #include <linux/lockd/lockd.h> +#include <linux/kthread.h> #define NLMDBG_FACILITY NLMDBG_SVCLOCK @@ -226,8 +227,7 @@ failed: } /* - * Delete a block. If the lock was cancelled or the grant callback - * failed, unlock is set to 1. + * Delete a block. * It is the caller's responsibility to check whether the file * can be closed hereafter. */ @@ -887,7 +887,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; - while (!list_empty(&nlm_blocked)) { + while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); if (block->b_when == NLM_NEVER) diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c index 068886de4dd..b0ae0700870 100644 --- a/fs/lockd/svcshare.c +++ b/fs/lockd/svcshare.c @@ -71,7 +71,8 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, struct nlm_share *share, **shpp; struct xdr_netobj *oh = &argp->lock.oh; - for (shpp = &file->f_shares; (share = *shpp) != 0; shpp = &share->s_next) { + for (shpp = &file->f_shares; (share = *shpp) != NULL; + shpp = &share->s_next) { if (share->s_host == host && nlm_cmp_owner(share, oh)) { *shpp = share->s_next; kfree(share); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 66648dd92d9..5606ae3d72d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -15,6 +15,7 @@ #include <linux/nfs_fs.h> #include <linux/mutex.h> #include <linux/freezer.h> +#include <linux/kthread.h> #include <net/inet_sock.h> @@ -27,9 +28,7 @@ struct nfs_callback_data { unsigned int users; struct svc_serv *serv; - pid_t pid; - struct completion started; - struct completion stopped; + struct task_struct *task; }; static struct nfs_callback_data nfs_callback_info; @@ -57,48 +56,44 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, /* * This is the callback kernel thread. */ -static void nfs_callback_svc(struct svc_rqst *rqstp) +static int +nfs_callback_svc(void *vrqstp) { - int err; + int err, preverr = 0; + struct svc_rqst *rqstp = vrqstp; - __module_get(THIS_MODULE); - lock_kernel(); - - nfs_callback_info.pid = current->pid; - daemonize("nfsv4-svc"); - /* Process request with signals blocked, but allow SIGKILL. */ - allow_signal(SIGKILL); set_freezable(); - complete(&nfs_callback_info.started); - - for(;;) { - if (signalled()) { - if (nfs_callback_info.users == 0) - break; - flush_signals(current); - } + /* + * FIXME: do we really need to run this under the BKL? If so, please + * add a comment about what it's intended to protect. + */ + lock_kernel(); + while (!kthread_should_stop()) { /* * Listen for a request on the socket */ err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); - if (err == -EAGAIN || err == -EINTR) + if (err == -EAGAIN || err == -EINTR) { + preverr = err; continue; + } if (err < 0) { - printk(KERN_WARNING - "%s: terminating on error %d\n", - __FUNCTION__, -err); - break; + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, err); + preverr = err; + } + schedule_timeout_uninterruptible(HZ); + continue; } + preverr = err; svc_process(rqstp); } - - flush_signals(current); - svc_exit_thread(rqstp); - nfs_callback_info.pid = 0; - complete(&nfs_callback_info.stopped); unlock_kernel(); - module_put_and_exit(0); + nfs_callback_info.task = NULL; + svc_exit_thread(rqstp); + return 0; } /* @@ -107,14 +102,13 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) int nfs_callback_up(void) { struct svc_serv *serv = NULL; + struct svc_rqst *rqstp; int ret = 0; lock_kernel(); mutex_lock(&nfs_callback_mutex); - if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; - init_completion(&nfs_callback_info.started); - init_completion(&nfs_callback_info.stopped); serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); ret = -ENOMEM; if (!serv) @@ -127,15 +121,28 @@ int nfs_callback_up(void) nfs_callback_tcpport = ret; dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); - ret = svc_create_thread(nfs_callback_svc, serv); - if (ret < 0) + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) { + ret = PTR_ERR(rqstp); goto out_err; + } + + svc_sock_update_bufs(serv); nfs_callback_info.serv = serv; - wait_for_completion(&nfs_callback_info.started); + + nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, + "nfsv4-svc"); + if (IS_ERR(nfs_callback_info.task)) { + ret = PTR_ERR(nfs_callback_info.task); + nfs_callback_info.serv = NULL; + nfs_callback_info.task = NULL; + svc_exit_thread(rqstp); + goto out_err; + } out: /* * svc_create creates the svc_serv with sv_nrthreads == 1, and then - * svc_create_thread increments that. So we need to call svc_destroy + * svc_prepare_thread increments that. So we need to call svc_destroy * on both success and failure so that the refcount is 1 when the * thread exits. */ @@ -152,19 +159,15 @@ out_err: } /* - * Kill the server process if it is not already up. + * Kill the server process if it is not already down. */ void nfs_callback_down(void) { lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; - do { - if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0) - break; - if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0) - break; - } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); + if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) + kthread_stop(nfs_callback_info.task); mutex_unlock(&nfs_callback_mutex); unlock_kernel(); } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 83e865a16ad..412738dbfbc 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -10,7 +10,6 @@ * nfs symlink handling code */ -#define NFS_NEED_XDR_TYPES #include <linux/time.h> #include <linux/errno.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index d13403e3362..294992e9bf6 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -10,6 +10,7 @@ #include <linux/sunrpc/svcauth.h> #include <linux/nfsd/nfsd.h> #include <linux/nfsd/export.h> +#include "auth.h" int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8a6f7c924c7..33bfcf09db4 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -35,6 +35,7 @@ #include <linux/lockd/bind.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/gss_api.h> +#include <net/ipv6.h> #define NFSDDBG_FACILITY NFSDDBG_EXPORT @@ -1548,6 +1549,7 @@ exp_addclient(struct nfsctl_client *ncp) { struct auth_domain *dom; int i, err; + struct in6_addr addr6; /* First, consistency check. */ err = -EINVAL; @@ -1566,9 +1568,10 @@ exp_addclient(struct nfsctl_client *ncp) goto out_unlock; /* Insert client into hashtable. */ - for (i = 0; i < ncp->cl_naddr; i++) - auth_unix_add_addr(ncp->cl_addrlist[i], dom); - + for (i = 0; i < ncp->cl_naddr; i++) { + ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); + auth_unix_add_addr(&addr6, dom); + } auth_unix_forget_old(dom); auth_domain_put(dom); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index aae2b29ae2c..562abf3380d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -344,6 +344,21 @@ static struct rpc_version * nfs_cb_version[] = { &nfs_cb_version4, }; +static struct rpc_program cb_program; + +static struct rpc_stat cb_stats = { + .program = &cb_program +}; + +#define NFS4_CALLBACK 0x40000000 +static struct rpc_program cb_program = { + .name = "nfs4_cb", + .number = NFS4_CALLBACK, + .nrvers = ARRAY_SIZE(nfs_cb_version), + .version = nfs_cb_version, + .stats = &cb_stats, +}; + /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cb_set an atomic? */ @@ -358,13 +373,12 @@ static int do_probe_callback(void *data) .to_maxval = (NFSD_LEASE_TIME/2) * HZ, .to_exponential = 1, }; - struct rpc_program * program = &cb->cb_program; struct rpc_create_args args = { .protocol = IPPROTO_TCP, .address = (struct sockaddr *)&addr, .addrsize = sizeof(addr), .timeout = &timeparms, - .program = program, + .program = &cb_program, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .flags = (RPC_CLNT_CREATE_NOPING), @@ -382,16 +396,8 @@ static int do_probe_callback(void *data) addr.sin_port = htons(cb->cb_port); addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Initialize rpc_program */ - program->name = "nfs4_cb"; - program->number = cb->cb_prog; - program->nrvers = ARRAY_SIZE(nfs_cb_version); - program->version = nfs_cb_version; - program->stats = &cb->cb_stat; - /* Initialize rpc_stat */ - memset(program->stats, 0, sizeof(cb->cb_stat)); - program->stats->program = program; + memset(args.program->stats, 0, sizeof(struct rpc_stat)); /* Create RPC client */ client = rpc_create(&args); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 996bd88b75b..5b398421b05 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -202,7 +202,7 @@ static struct cache_detail idtoname_cache = { .alloc = ent_alloc, }; -int +static int idtoname_parse(struct cache_detail *cd, char *buf, int buflen) { struct ent ent, *res; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81a75f3081f..55dfdd71f1b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1639,6 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta locks_init_lock(&fl); fl.fl_lmops = &nfsd_lease_mng_ops; fl.fl_flags = FL_LEASE; + fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl.fl_end = OFFSET_MAX; fl.fl_owner = (fl_owner_t)dp; fl.fl_file = stp->st_vfs_file; @@ -1647,8 +1648,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = vfs_setlease(stp->st_vfs_file, - flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { + if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); flag = NFS4_OPEN_DELEGATE_NONE; @@ -1763,10 +1763,6 @@ out: return status; } -static struct workqueue_struct *laundry_wq; -static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); - __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) @@ -1874,7 +1870,11 @@ nfs4_laundromat(void) return clientid_val; } -void +static struct workqueue_struct *laundry_wq; +static void laundromat_main(struct work_struct *); +static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); + +static void laundromat_main(struct work_struct *not_used) { time_t t; @@ -1975,6 +1975,26 @@ io_during_grace_disallowed(struct inode *inode, int flags) && mandatory_lock(inode); } +static int check_stateid_generation(stateid_t *in, stateid_t *ref) +{ + /* If the client sends us a stateid from the future, it's buggy: */ + if (in->si_generation > ref->si_generation) + return nfserr_bad_stateid; + /* + * The following, however, can happen. For example, if the + * client sends an open and some IO at the same time, the open + * may bump si_generation while the IO is still in flight. + * Thanks to hard links and renames, the client never knows what + * file an open will affect. So it could avoid that situation + * only by serializing all opens and IO from the same open + * owner. To recover from the old_stateid error, the client + * will just have to retry the IO: + */ + if (in->si_generation < ref->si_generation) + return nfserr_old_stateid; + return nfs_ok; +} + /* * Checks for stateid operations */ @@ -2023,12 +2043,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl goto out; stidp = &stp->st_stateid; } - if (stateid->si_generation > stidp->si_generation) - goto out; - - /* OLD STATEID */ - status = nfserr_old_stateid; - if (stateid->si_generation < stidp->si_generation) + status = check_stateid_generation(stateid, stidp); + if (status) goto out; if (stp) { if ((status = nfs4_check_openmode(stp,flags))) @@ -2036,7 +2052,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl renew_client(stp->st_stateowner->so_client); if (filpp) *filpp = stp->st_vfs_file; - } else if (dp) { + } else { if ((status = nfs4_check_delegmode(dp, flags))) goto out; renew_client(dp->dl_client); @@ -2065,6 +2081,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; + __be32 status; dprintk("NFSD: preprocess_seqid_op: seqid=%d " "stateid = (%08x/%08x/%08x/%08x)\n", seqid, @@ -2127,7 +2144,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei } } - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { + if (nfs4_check_fh(current_fh, stp)) { dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); return nfserr_bad_stateid; } @@ -2150,15 +2167,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei " confirmed yet!\n"); return nfserr_bad_stateid; } - if (stateid->si_generation > stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: future stateid?!\n"); - return nfserr_bad_stateid; - } - - if (stateid->si_generation < stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: old stateid!\n"); - return nfserr_old_stateid; - } + status = check_stateid_generation(stateid, &stp->st_stateid); + if (status) + return status; renew_client(sop->so_client); return nfs_ok; @@ -2194,7 +2205,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, oc->oc_seqid, &oc->oc_req_stateid, - CHECK_FH | CONFIRM | OPEN_STATE, + CONFIRM | OPEN_STATE, &oc->oc_stateowner, &stp, NULL))) goto out; @@ -2265,7 +2276,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, od->od_seqid, &od->od_stateid, - CHECK_FH | OPEN_STATE, + OPEN_STATE, &od->od_stateowner, &stp, NULL))) goto out; @@ -2318,7 +2329,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, close->cl_seqid, &close->cl_stateid, - CHECK_FH | OPEN_STATE | CLOSE_STATE, + OPEN_STATE | CLOSE_STATE, &close->cl_stateowner, &stp, NULL))) goto out; status = nfs_ok; @@ -2623,7 +2634,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(&cstate->current_fh, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - CHECK_FH | OPEN_STATE, + OPEN_STATE, &lock->lk_replay_owner, &open_stp, lock); if (status) @@ -2650,7 +2661,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(&cstate->current_fh, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, - CHECK_FH | LOCK_STATE, + LOCK_STATE, &lock->lk_replay_owner, &lock_stp, lock); if (status) goto out; @@ -2847,7 +2858,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, locku->lu_seqid, &locku->lu_stateid, - CHECK_FH | LOCK_STATE, + LOCK_STATE, &locku->lu_stateowner, &stp, NULL))) goto out; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0e6a179ecca..1ba7ad98193 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1867,6 +1867,15 @@ out_serverfault: goto out; } +static inline int attributes_need_mount(u32 *bmval) +{ + if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) + return 1; + if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) + return 1; + return 0; +} + static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, const char *name, int namlen, __be32 *p, int *buflen) @@ -1888,9 +1897,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, * we will not follow the cross mount and will fill the attribtutes * directly from the mountpoint dentry. */ - if (d_mountpoint(dentry) && - (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 && - (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0) + if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) ignore_crossmnt = 1; else if (d_mountpoint(dentry)) { int err; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8516137cdbb..613bcb8171a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -37,6 +37,7 @@ #include <linux/nfsd/syscall.h> #include <asm/uaccess.h> +#include <net/ipv6.h> /* * We have a single directory with 9 nodes in it. @@ -149,7 +150,6 @@ static const struct file_operations transaction_ops = { .release = simple_transaction_release, }; -extern struct seq_operations nfs_exports_op; static int exports_open(struct inode *inode, struct file *file) { return seq_open(file, &nfs_exports_op); @@ -222,6 +222,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) struct auth_domain *clp; int err = 0; struct knfsd_fh *res; + struct in6_addr in6; if (size < sizeof(*data)) return -EINVAL; @@ -236,7 +237,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) res = (struct knfsd_fh*)buf; exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) + + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); + + clp = auth_unix_lookup(&in6); + if (!clp) err = -EPERM; else { err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); @@ -257,6 +262,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) int err = 0; struct knfsd_fh fh; char *res; + struct in6_addr in6; if (size < sizeof(*data)) return -EINVAL; @@ -271,7 +277,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) res = buf; sin = (struct sockaddr_in *)&data->gd_addr; exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) + + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); + + clp = auth_unix_lookup(&in6); + if (!clp) err = -EPERM; else { err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); @@ -347,8 +357,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return mesg - buf; } -extern int nfsd_nrthreads(void); - static ssize_t write_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for a number of threads and call nfsd_svc @@ -371,10 +379,6 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return strlen(buf); } -extern int nfsd_nrpools(void); -extern int nfsd_get_nrthreads(int n, int *); -extern int nfsd_set_nrthreads(int n, int *); - static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for an array of number of threads per node diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3e6b3f41ee1..100ae564116 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -113,6 +113,124 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, } /* + * Use the given filehandle to look up the corresponding export and + * dentry. On success, the results are used to set fh_export and + * fh_dentry. + */ +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + struct fid *fid = NULL, sfid; + struct svc_export *exp; + struct dentry *dentry; + int fileid_type; + int data_left = fh->fh_size/4; + __be32 error; + + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + if (rqstp->rq_vers == 4 && fh->fh_size == 0) + return nfserr_nofilehandle; + + if (fh->fh_version == 1) { + int len; + + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; + } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); + fid = (struct fid *)(fh->fh_auth + len); + } else { + __u32 tfh[2]; + dev_t xdev; + ino_t xino; + + if (fh->fh_size != NFS_FHSIZE) + return error; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + } + + error = nfserr_stale; + if (PTR_ERR(exp) == -ENOENT) + return error; + + if (IS_ERR(exp)) + return nfserrno(PTR_ERR(exp)); + + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; + + /* + * Look up the dentry using the NFS file handle. + */ + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + + if (fh->fh_version != 1) { + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + } else + fileid_type = fh->fh_fileid_type; + + if (fileid_type == FILEID_ROOT) + dentry = dget(exp->ex_path.dentry); + else { + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + } + if (dentry == NULL) + goto out; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) != -EINVAL) + error = nfserrno(PTR_ERR(dentry)); + goto out; + } + + if (S_ISDIR(dentry->d_inode->i_mode) && + (dentry->d_flags & DCACHE_DISCONNECTED)) { + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + } + + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + nfsd_nr_verified++; + return 0; +out: + exp_put(exp); + return error; +} + +/* * Perform sanity checks on the dentry in a client's file handle. * * Note that the file handle dentry may need to be freed even after @@ -124,115 +242,18 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, __be32 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) { - struct knfsd_fh *fh = &fhp->fh_handle; - struct svc_export *exp = NULL; + struct svc_export *exp; struct dentry *dentry; - __be32 error = 0; + __be32 error; dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); if (!fhp->fh_dentry) { - struct fid *fid = NULL, sfid; - int fileid_type; - int data_left = fh->fh_size/4; - - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - if (rqstp->rq_vers == 4 && fh->fh_size == 0) - return nfserr_nofilehandle; - - if (fh->fh_version == 1) { - int len; - if (--data_left<0) goto out; - switch (fh->fh_auth_type) { - case 0: break; - default: goto out; - } - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) goto out; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - if ((data_left -= len)<0) goto out; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, - fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - if (fh->fh_size != NFS_FHSIZE) - goto out; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); - } - - error = nfserr_stale; - if (PTR_ERR(exp) == -ENOENT) - goto out; - - if (IS_ERR(exp)) { - error = nfserrno(PTR_ERR(exp)); - goto out; - } - - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_set_fh_dentry(rqstp, fhp); if (error) goto out; - - /* - * Look up the dentry using the NFS file handle. - */ - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; - - if (fileid_type == FILEID_ROOT) - dentry = dget(exp->ex_path.dentry); - else { - dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); - } - if (dentry == NULL) - goto out; - if (IS_ERR(dentry)) { - if (PTR_ERR(dentry) != -EINVAL) - error = nfserrno(PTR_ERR(dentry)); - goto out; - } - - if (S_ISDIR(dentry->d_inode->i_mode) && - (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - } - - fhp->fh_dentry = dentry; - fhp->fh_export = exp; - nfsd_nr_verified++; - cache_get(&exp->h); + dentry = fhp->fh_dentry; + exp = fhp->fh_export; } else { /* * just rechecking permissions @@ -242,7 +263,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify - just checking\n"); dentry = fhp->fh_dentry; exp = fhp->fh_export; - cache_get(&exp->h); /* * Set user creds for this exportpoint; necessary even * in the "just checking" case because this may be a @@ -281,8 +301,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) access, ntohl(error)); } out: - if (exp && !IS_ERR(exp)) - exp_put(exp); if (error == nfserr_stale) nfsdstats.fh_stale++; return error; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9647b0f7bc0..941041f4b13 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -244,7 +244,6 @@ static int nfsd_init_socks(int port) if (error < 0) return error; -#ifdef CONFIG_NFSD_TCP error = lockd_up(IPPROTO_TCP); if (error >= 0) { error = svc_create_xprt(nfsd_serv, "tcp", port, @@ -254,7 +253,6 @@ static int nfsd_init_socks(int port) } if (error < 0) return error; -#endif return 0; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 304bf5f643c..a3a291f771f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -264,7 +264,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, struct inode *inode; int accmode = MAY_SATTR; int ftype = 0; - int imode; __be32 err; int host_err; int size_change = 0; @@ -360,25 +359,25 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, DQUOT_INIT(inode); } - imode = inode->i_mode; + /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; - imode = iap->ia_mode |= (imode & ~S_IALLUGO); - /* if changing uid/gid revoke setuid/setgid in mode */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { - iap->ia_valid |= ATTR_KILL_PRIV; + iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); + } + + /* Revoke setuid/setgid on chown */ + if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || + ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { + iap->ia_valid |= ATTR_KILL_PRIV; + if (iap->ia_valid & ATTR_MODE) { + /* we're setting mode too, just clear the s*id bits */ iap->ia_mode &= ~S_ISUID; + if (iap->ia_mode & S_IXGRP) + iap->ia_mode &= ~S_ISGID; + } else { + /* set ATTR_KILL_* bits and let VFS handle it */ + iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_mode &= ~S_ISGID; - } else { - /* - * Revoke setuid/setgid bit on chown/chgrp - */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) - iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_valid |= ATTR_KILL_SGID; } /* Change the attributes. */ @@ -988,7 +987,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * flushing the data to disk is handled separately below. */ - if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ + if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; *stablep = 2; /* FILE_SYNC */ } @@ -1152,7 +1151,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif /* CONFIG_NFSD_V3 */ -__be32 +static __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, struct iattr *iap) { diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index adcbb05b120..de8387b7ceb 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -43,7 +43,7 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - __u32 raw[6]; + __u32 raw[0]; }; }; diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h index 7f11fa58920..539f3b550ea 100644 --- a/include/linux/nfs3.h +++ b/include/linux/nfs3.h @@ -96,7 +96,7 @@ struct nfs3_fh { #define MOUNTPROC3_UMNTALL 4 -#if defined(__KERNEL__) || defined(NFS_NEED_KERNEL_TYPES) +#if defined(__KERNEL__) /* Number of 32bit words in post_op_attr */ #define NFS3_POST_OP_ATTR_WORDS 22 diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index e726fc3a437..fc972048e57 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild @@ -1,6 +1,6 @@ unifdef-y += const.h +unifdef-y += debug.h unifdef-y += export.h +unifdef-y += nfsfh.h unifdef-y += stats.h unifdef-y += syscall.h -unifdef-y += nfsfh.h -unifdef-y += debug.h diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 7b5d784cc85..04b355c801d 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -10,7 +10,6 @@ #ifndef NFSCACHE_H #define NFSCACHE_H -#ifdef __KERNEL__ #include <linux/in.h> #include <linux/uio.h> @@ -77,5 +76,4 @@ void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *, int); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -#endif /* __KERNEL__ */ #endif /* NFSCACHE_H */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 8caf4c4f64e..21ee440dd3e 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -27,7 +27,6 @@ #define NFSD_VERSION "0.5" #define NFSD_SUPPORTED_MINOR_VERSION 0 -#ifdef __KERNEL__ /* * Special flags for nfsd_permission. These must be different from MAY_READ, * MAY_WRITE, and MAY_EXEC. @@ -56,12 +55,20 @@ extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct svc_serv *nfsd_serv; + +extern struct seq_operations nfs_exports_op; + /* * Function prototypes. */ int nfsd_svc(unsigned short port, int nrservs); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); +int nfsd_nrthreads(void); +int nfsd_nrpools(void); +int nfsd_get_nrthreads(int n, int *); +int nfsd_set_nrthreads(int n, int *); + /* nfsd/vfs.c */ int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); @@ -326,6 +333,4 @@ extern struct timeval nfssvc_boot; #endif /* CONFIG_NFSD_V4 */ -#endif /* __KERNEL__ */ - #endif /* LINUX_NFSD_NFSD_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03547d6abee..2d8b211b932 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -120,7 +120,6 @@ struct cache_deferred_req { struct list_head hash; /* on hash chain */ struct list_head recent; /* on fifo */ struct cache_head *item; /* cache item we wait on */ - time_t recv_time; void *owner; /* we might need to discard all defered requests * owned by someone */ void (*revisit)(struct cache_deferred_req *req, diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 5a4b1e0206e..a10f1fb0bf7 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -70,8 +70,6 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; -#define KRB5_CKSUM_LENGTH 8 - #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 @@ -150,9 +148,9 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, s32 krb5_make_seq_num(struct crypto_blkcipher *key, int direction, - s32 seqnum, unsigned char *cksum, unsigned char *buf); + u32 seqnum, unsigned char *cksum, unsigned char *buf); s32 krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, - unsigned char *buf, int *direction, s32 * seqnum); + unsigned char *buf, int *direction, u32 *seqnum); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 64c97552964..4b54c5fdcfd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -386,7 +386,6 @@ struct svc_serv * svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv*)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); -int svc_create_thread(svc_thread_fn, struct svc_serv *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv*), diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 22e1ef8e200..d39dbdc7b10 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -24,6 +24,7 @@ struct svc_cred { }; struct svc_rqst; /* forward decl */ +struct in6_addr; /* Authentication is done in the context of a domain. * @@ -120,10 +121,10 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); +extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct in_addr addr); +extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(void *); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 206f092ad4c..8cff696dedf 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,8 +26,8 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - int sk_reclen; /* length of record */ - int sk_tcplen; /* current read length */ + u32 sk_reclen; /* length of record */ + u32 sk_tcplen; /* current read length */ }; /* diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 49c48983019..e0a612bc9c4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -383,6 +383,15 @@ static inline int ipv6_addr_orchid(const struct in6_addr *a) == htonl(0x20010010)); } +static inline void ipv6_addr_set_v4mapped(const __be32 addr, + struct in6_addr *v4mapped) +{ + ipv6_addr_set(v4mapped, + 0, 0, + htonl(0x0000FFFF), + addr); +} + /* * find the first different bit between two addresses * length of address must be a multiple of 32bits diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index ea8c92ecdae..d83b881685f 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -148,7 +148,7 @@ int g_token_size(struct xdr_netobj *mech, unsigned int body_size) { /* set body_size to sequence contents size */ - body_size += 4 + (int) mech->len; /* NEED overflow check */ + body_size += 2 + (int) mech->len; /* NEED overflow check */ return(1 + der_length_size(body_size) + body_size); } @@ -161,7 +161,7 @@ void g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf) { *(*buf)++ = 0x60; - der_write_length(buf, 4 + mech->len + body_size); + der_write_length(buf, 2 + mech->len + body_size); *(*buf)++ = 0x06; *(*buf)++ = (unsigned char) mech->len; TWRITE_STR(*buf, mech->data, ((int) mech->len)); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0dd792338fa..1d52308ca32 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -66,8 +66,8 @@ krb5_encrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_blkcipher_ivsize(tfm)); + dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", + crypto_blkcipher_ivsize(tfm)); goto out; } @@ -102,7 +102,7 @@ krb5_decrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", + dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", crypto_blkcipher_ivsize(tfm)); goto out; } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index dedcbd6108f..5f1d36dfbcf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -87,10 +87,10 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - token->len = g_token_size(&ctx->mech_used, 22); + token->len = g_token_size(&ctx->mech_used, 24); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22, &ptr); + g_make_token_header(&ctx->mech_used, 24, &ptr); *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); @@ -109,15 +109,14 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)) + seq_send, krb5_hdr + 16, krb5_hdr + 8)) return GSS_S_FAILURE; return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 43f3421f1e6..f160be6c1a4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -43,7 +43,7 @@ s32 krb5_make_seq_num(struct crypto_blkcipher *key, int direction, - s32 seqnum, + u32 seqnum, unsigned char *cksum, unsigned char *buf) { unsigned char plain[8]; @@ -65,7 +65,7 @@ s32 krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, - int *direction, s32 * seqnum) + int *direction, u32 *seqnum) { s32 code; unsigned char plain[8]; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index e30a993466b..d91a5d00480 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -82,7 +82,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; int direction; - s32 seqnum; + u32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 3bdc527ee64..b00b1b42630 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -137,7 +137,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; - headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + headlen = g_token_size(&kctx->mech_used, 24 + plainlen) - (buf->len - offset); ptr = buf->head[0].iov_base + offset; @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, buf->len += headlen; BUG_ON((buf->len - offset - headlen) % blocksize); - g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); @@ -176,9 +176,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index abf17ce2e3b..c832712f8d5 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -107,10 +107,10 @@ spkm3_make_token(struct spkm3_ctx *ctx, tokenlen = 10 + ctxelen + 1 + md5elen + 1; /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen); + token->len = g_token_size(&ctx->mech_used, tokenlen + 2); ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen, &ptr); + g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr); spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 481f984e9a2..5905d56737d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1146,7 +1146,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_INTEGRITY: if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1154,7 +1154,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1169,6 +1169,11 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) ret = SVC_OK; goto out; } +garbage_args: + /* Restore write pointer to its original value: */ + xdr_ressize_check(rqstp, reject_stat); + ret = SVC_GARBAGE; + goto out; auth_err: /* Restore write pointer to its original value: */ xdr_ressize_check(rqstp, reject_stat); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b5f2786251b..d75530ff2a6 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -571,7 +571,6 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) return -ETIMEDOUT; dreq->item = item; - dreq->recv_time = get_seconds(); spin_lock(&cache_defer_lock); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 090af78d68b..d74c2d26953 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -510,8 +510,7 @@ EXPORT_SYMBOL(svc_destroy); static int svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) { - int pages; - int arghi; + unsigned int pages, arghi; pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page @@ -525,7 +524,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) rqstp->rq_pages[arghi++] = p; pages--; } - return ! pages; + return pages == 0; } /* @@ -534,8 +533,9 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) static void svc_release_buffer(struct svc_rqst *rqstp) { - int i; - for (i=0; i<ARRAY_SIZE(rqstp->rq_pages); i++) + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) if (rqstp->rq_pages[i]) put_page(rqstp->rq_pages[i]); } @@ -590,7 +590,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, struct svc_rqst *rqstp; int error = -ENOMEM; int have_oldmask = 0; - cpumask_t oldmask; + cpumask_t uninitialized_var(oldmask); rqstp = svc_prepare_thread(serv, pool); if (IS_ERR(rqstp)) { @@ -619,16 +619,6 @@ out_thread: } /* - * Create a thread in the default pool. Caller must hold BKL. - */ -int -svc_create_thread(svc_thread_fn func, struct svc_serv *serv) -{ - return __svc_create_thread(func, serv, &serv->sv_pools[0]); -} -EXPORT_SYMBOL(svc_create_thread); - -/* * Choose a pool in which to create a new thread, for svc_set_num_threads */ static inline struct svc_pool * @@ -921,8 +911,7 @@ svc_process(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - rpc_stat = rpc_garbage_args; - goto err_bad; + goto err_garbage; case SVC_SYSERR: rpc_stat = rpc_system_err; goto err_bad; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 332eb47539e..d8e8d79a845 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -18,6 +18,7 @@ #include <linux/skbuff.h> #include <linux/file.h> #include <linux/freezer.h> +#include <linux/kthread.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -586,8 +587,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - int j = msecs_to_jiffies(500); - schedule_timeout_uninterruptible(j); + set_current_state(TASK_INTERRUPTIBLE); + if (signalled() || kthread_should_stop()) { + set_current_state(TASK_RUNNING); + return -EINTR; + } + schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_pages[i] = p; } @@ -607,7 +612,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); - if (signalled()) + if (signalled() || kthread_should_stop()) return -EINTR; spin_lock_bh(&pool->sp_lock); @@ -626,6 +631,20 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + + /* + * checking kthread_should_stop() here allows us to avoid + * locking and signalling when stopping kthreads that call + * svc_recv. If the thread has already been woken up, then + * we can exit here without sleeping. If not, then it + * it'll be woken up quickly during the schedule_timeout + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + spin_unlock_bh(&pool->sp_lock); + return -EINTR; + } + add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); @@ -641,7 +660,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_thread_dequeue(pool, rqstp); spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); - return signalled()? -EINTR : -EAGAIN; + if (signalled() || kthread_should_stop()) + return -EINTR; + else + return -EAGAIN; } } spin_unlock_bh(&pool->sp_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3c64051e455..3f30ee6006a 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -11,7 +11,8 @@ #include <linux/hash.h> #include <linux/string.h> #include <net/sock.h> - +#include <net/ipv6.h> +#include <linux/kernel.h> #define RPCDBG_FACILITY RPCDBG_AUTH @@ -85,7 +86,7 @@ static void svcauth_unix_domain_release(struct auth_domain *dom) struct ip_map { struct cache_head h; char m_class[8]; /* e.g. "nfsd" */ - struct in_addr m_addr; + struct in6_addr m_addr; struct unix_domain *m_client; int m_add_change; }; @@ -113,12 +114,19 @@ static inline int hash_ip(__be32 ip) return (hash ^ (hash>>8)) & 0xff; } #endif +static inline int hash_ip6(struct in6_addr ip) +{ + return (hash_ip(ip.s6_addr32[0]) ^ + hash_ip(ip.s6_addr32[1]) ^ + hash_ip(ip.s6_addr32[2]) ^ + hash_ip(ip.s6_addr32[3])); +} static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); return strcmp(orig->m_class, new->m_class) == 0 - && orig->m_addr.s_addr == new->m_addr.s_addr; + && ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { @@ -126,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) struct ip_map *item = container_of(citem, struct ip_map, h); strcpy(new->m_class, item->m_class); - new->m_addr.s_addr = item->m_addr.s_addr; + ipv6_addr_copy(&new->m_addr, &item->m_addr); } static void update(struct cache_head *cnew, struct cache_head *citem) { @@ -150,22 +158,24 @@ static void ip_map_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { - char text_addr[20]; + char text_addr[40]; struct ip_map *im = container_of(h, struct ip_map, h); - __be32 addr = im->m_addr.s_addr; - - snprintf(text_addr, 20, "%u.%u.%u.%u", - ntohl(addr) >> 24 & 0xff, - ntohl(addr) >> 16 & 0xff, - ntohl(addr) >> 8 & 0xff, - ntohl(addr) >> 0 & 0xff); + if (ipv6_addr_v4mapped(&(im->m_addr))) { + snprintf(text_addr, 20, NIPQUAD_FMT, + ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); + } else { + snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr)); + } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); (*bpp)[-1] = '\n'; } -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, @@ -176,10 +186,10 @@ static int ip_map_parse(struct cache_detail *cd, * for scratch: */ char *buf = mesg; int len; - int b1,b2,b3,b4; + int b1, b2, b3, b4, b5, b6, b7, b8; char c; char class[8]; - struct in_addr addr; + struct in6_addr addr; int err; struct ip_map *ipmp; @@ -198,7 +208,23 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) + if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) { + addr.s6_addr32[0] = 0; + addr.s6_addr32[1] = 0; + addr.s6_addr32[2] = htonl(0xffff); + addr.s6_addr32[3] = + htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); + } else if (sscanf(buf, NIP6_FMT "%c", + &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { + addr.s6_addr16[0] = htons(b1); + addr.s6_addr16[1] = htons(b2); + addr.s6_addr16[2] = htons(b3); + addr.s6_addr16[3] = htons(b4); + addr.s6_addr16[4] = htons(b5); + addr.s6_addr16[5] = htons(b6); + addr.s6_addr16[6] = htons(b7); + addr.s6_addr16[7] = htons(b8); + } else return -EINVAL; expiry = get_expiry(&mesg); @@ -216,10 +242,7 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - addr.s_addr = - htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - - ipmp = ip_map_lookup(class,addr); + ipmp = ip_map_lookup(class, &addr); if (ipmp) { err = ip_map_update(ipmp, container_of(dom, struct unix_domain, h), @@ -239,7 +262,7 @@ static int ip_map_show(struct seq_file *m, struct cache_head *h) { struct ip_map *im; - struct in_addr addr; + struct in6_addr addr; char *dom = "-no-domain-"; if (h == NULL) { @@ -248,20 +271,24 @@ static int ip_map_show(struct seq_file *m, } im = container_of(h, struct ip_map, h); /* class addr domain */ - addr = im->m_addr; + ipv6_addr_copy(&addr, &im->m_addr); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) dom = im->m_client->h.name; - seq_printf(m, "%s %d.%d.%d.%d %s\n", - im->m_class, - ntohl(addr.s_addr) >> 24 & 0xff, - ntohl(addr.s_addr) >> 16 & 0xff, - ntohl(addr.s_addr) >> 8 & 0xff, - ntohl(addr.s_addr) >> 0 & 0xff, - dom - ); + if (ipv6_addr_v4mapped(&addr)) { + seq_printf(m, "%s" NIPQUAD_FMT "%s\n", + im->m_class, + ntohl(addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(addr.s6_addr32[3]) >> 0 & 0xff, + dom); + } else { + seq_printf(m, "%s" NIP6_FMT "%s\n", + im->m_class, NIP6(addr), dom); + } return 0; } @@ -281,16 +308,16 @@ struct cache_detail ip_map_cache = { .alloc = ip_map_alloc, }; -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); - ip.m_addr = addr; + ipv6_addr_copy(&ip.m_addr, addr); ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip(addr.s_addr)); + hash_ip6(*addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -319,14 +346,14 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ch = sunrpc_cache_update(&ip_map_cache, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip(ipm->m_addr.s_addr)); + hash_ip6(ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, &ip_map_cache); return 0; } -int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) +int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; struct ip_map *ipmp; @@ -355,7 +382,7 @@ int auth_unix_forget_old(struct auth_domain *dom) } EXPORT_SYMBOL(auth_unix_forget_old); -struct auth_domain *auth_unix_lookup(struct in_addr addr) +struct auth_domain *auth_unix_lookup(struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; @@ -650,9 +677,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, int svcauth_unix_set_client(struct svc_rqst *rqstp) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; + switch (rqstp->rq_addr.ss_family) { + case AF_INET: + sin = svc_addr_in(rqstp); + sin6 = &sin6_storage; + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + htonl(0x0000FFFF), sin->sin_addr.s_addr); + break; + case AF_INET6: + sin6 = svc_addr_in6(rqstp); + break; + default: + BUG(); + } + rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) return SVC_OK; @@ -660,7 +702,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ipm = ip_map_cached_get(rqstp); if (ipm == NULL) ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, - sin->sin_addr); + &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c475977de05..3e65719f1ef 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -38,6 +38,7 @@ #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/tcp.h> #include <net/tcp_states.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -45,6 +46,7 @@ #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/stats.h> @@ -822,8 +824,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ - if (svsk->sk_tcplen < 4) { - unsigned long want = 4 - svsk->sk_tcplen; + if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; @@ -833,32 +835,31 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_tcplen += len; if (len < want) { - dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", - len, want); + dprintk("svc: short recvfrom while reading record " + "length (%d of %d)\n", len, want); svc_xprt_received(&svsk->sk_xprt); return -EAGAIN; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & 0x80000000)) { + if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (non-terminal)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: multiple fragments " + "per record not supported\n"); goto err_delete; } - svsk->sk_reclen &= 0x7fffffff; + svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (large)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: " + "fragment too large: 0x%08lx\n", + (unsigned long)svsk->sk_reclen); goto err_delete; } } @@ -1045,7 +1046,6 @@ void svc_cleanup_xprt_sock(void) static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - struct tcp_sock *tp = tcp_sk(sk); svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -1063,7 +1063,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; - tp->nonagle = 1; /* disable Nagle's algorithm */ + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; /* initialise setting must have enough space to * receive and respond to one request. @@ -1101,6 +1101,7 @@ void svc_sock_update_bufs(struct svc_serv *serv) } spin_unlock_bh(&serv->sv_lock); } +EXPORT_SYMBOL(svc_sock_update_bufs); /* * Initialize socket for RPC use and create svc_sock struct diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 16fd3f6718f..af408fc1263 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1036,6 +1036,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) wait_event(xprt->sc_send_wait, atomic_read(&xprt->sc_sq_count) < xprt->sc_sq_depth); + if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) + return 0; continue; } /* Bumped used SQ WR count and post */ |