27 files changed, 21138 insertions, 0 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
new file mode 100644
index 00000000000..b4baa031edf
--- /dev/null
+++ b/fs/nfs/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the Linux nfs filesystem routines.
+#
+
+obj-$(CONFIG_NFS_FS) += nfs.o
+
+nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
+			   proc.o read.o symlink.o unlink.o write.o
+nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
+nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
+			   delegation.o idmap.o \
+			   callback.o callback_xdr.o callback_proc.o
+nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
+nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
new file mode 100644
index 00000000000..560d6175dd5
--- /dev/null
+++ b/fs/nfs/callback.c
@@ -0,0 +1,187 @@
+/*
+ * linux/fs/nfs/callback.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback handling
+ */
+
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+struct nfs_callback_data {
+	unsigned int users;
+	struct svc_serv *serv;
+	pid_t pid;
+	struct completion started;
+	struct completion stopped;
+};
+
+static struct nfs_callback_data nfs_callback_info;
+static DECLARE_MUTEX(nfs_callback_sema);
+static struct svc_program nfs4_callback_program;
+
+unsigned short nfs_callback_tcpport;
+
+/*
+ * This is the callback kernel thread.
+ */
+static void nfs_callback_svc(struct svc_rqst *rqstp)
+{
+	struct svc_serv *serv = rqstp->rq_server;
+	int err;
+
+	__module_get(THIS_MODULE);
+	lock_kernel();
+
+	nfs_callback_info.pid = current->pid;
+	daemonize("nfsv4-svc");
+	/* Process request with signals blocked, but allow SIGKILL.  */
+	allow_signal(SIGKILL);
+
+	complete(&nfs_callback_info.started);
+
+	while (nfs_callback_info.users != 0 || !signalled()) {
+		/*
+		 * Listen for a request on the socket
+		 */
+		err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
+		if (err == -EAGAIN || err == -EINTR)
+			continue;
+		if (err < 0) {
+			printk(KERN_WARNING
+					"%s: terminating on error %d\n",
+					__FUNCTION__, -err);
+			break;
+		}
+		dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
+				NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
+		svc_process(serv, rqstp);
+	}
+
+	nfs_callback_info.pid = 0;
+	complete(&nfs_callback_info.stopped);
+	unlock_kernel();
+	module_put_and_exit(0);
+}
+
+/*
+ * Bring up the server process if it is not already up.
+ */
+int nfs_callback_up(void)
+{
+	struct svc_serv *serv;
+	struct svc_sock *svsk;
+	int ret = 0;
+
+	lock_kernel();
+	down(&nfs_callback_sema);
+	if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
+		goto out;
+	init_completion(&nfs_callback_info.started);
+	init_completion(&nfs_callback_info.stopped);
+	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
+	ret = -ENOMEM;
+	if (!serv)
+		goto out_err;
+	/* FIXME: We don't want to register this socket with the portmapper */
+	ret = svc_makesock(serv, IPPROTO_TCP, 0);
+	if (ret < 0)
+		goto out_destroy;
+	if (!list_empty(&serv->sv_permsocks)) {
+		svsk = list_entry(serv->sv_permsocks.next,
+				struct svc_sock, sk_list);
+		nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
+		dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
+	} else
+		BUG();
+	ret = svc_create_thread(nfs_callback_svc, serv);
+	if (ret < 0)
+		goto out_destroy;
+	nfs_callback_info.serv = serv;
+	wait_for_completion(&nfs_callback_info.started);
+out:
+	up(&nfs_callback_sema);
+	unlock_kernel();
+	return ret;
+out_destroy:
+	svc_destroy(serv);
+out_err:
+	nfs_callback_info.users--;
+	goto out;
+}
+
+/*
+ * Kill the server process if it is not already up.
+ */
+int nfs_callback_down(void)
+{
+	int ret = 0;
+
+	lock_kernel();
+	down(&nfs_callback_sema);
+	if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
+		goto out;
+	kill_proc(nfs_callback_info.pid, SIGKILL, 1);
+	wait_for_completion(&nfs_callback_info.stopped);
+out:
+	up(&nfs_callback_sema);
+	unlock_kernel();
+	return ret;
+}
+
+static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+{
+	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
+	struct nfs4_client *clp;
+
+	/* Don't talk to strangers */
+	clp = nfs4_find_client(addr);
+	if (clp == NULL)
+		return SVC_DROP;
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
+	nfs4_put_client(clp);
+	switch (rqstp->rq_authop->flavour) {
+		case RPC_AUTH_NULL:
+			if (rqstp->rq_proc != CB_NULL)
+				return SVC_DENIED;
+			break;
+		case RPC_AUTH_UNIX:
+			break;
+		case RPC_AUTH_GSS:
+			/* FIXME: RPCSEC_GSS handling? */
+		default:
+			return SVC_DENIED;
+	}
+	return SVC_OK;
+}
+
+/*
+ * Define NFS4 callback program
+ */
+extern struct svc_version nfs4_callback_version1;
+
+static struct svc_version *nfs4_callback_version[] = {
+	[1] = &nfs4_callback_version1,
+};
+
+static struct svc_stat nfs4_callback_stats;
+
+static struct svc_program nfs4_callback_program = {
+	.pg_prog = NFS4_CALLBACK,			/* RPC service number */
+	.pg_nvers = ARRAY_SIZE(nfs4_callback_version),	/* Number of entries */
+	.pg_vers = nfs4_callback_version,		/* version table */
+	.pg_name = "NFSv4 callback",			/* service name */
+	.pg_class = "nfs",				/* authentication class */
+	.pg_stats = &nfs4_callback_stats,
+	.pg_authenticate = nfs_callback_authenticate,
+};
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
new file mode 100644
index 00000000000..a0db2d4f941
--- /dev/null
+++ b/fs/nfs/callback.h
@@ -0,0 +1,70 @@
+/*
+ * linux/fs/nfs/callback.h
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback definitions
+ */
+#ifndef __LINUX_FS_NFS_CALLBACK_H
+#define __LINUX_FS_NFS_CALLBACK_H
+
+#define NFS4_CALLBACK 0x40000000
+#define NFS4_CALLBACK_XDRSIZE 2048
+#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+
+enum nfs4_callback_procnum {
+	CB_NULL = 0,
+	CB_COMPOUND = 1,
+};
+
+enum nfs4_callback_opnum {
+	OP_CB_GETATTR = 3,
+	OP_CB_RECALL  = 4,
+	OP_CB_ILLEGAL = 10044,
+};
+
+struct cb_compound_hdr_arg {
+	int taglen;
+	const char *tag;
+	unsigned int callback_ident;
+	unsigned nops;
+};
+
+struct cb_compound_hdr_res {
+	uint32_t *status;
+	int taglen;
+	const char *tag;
+	uint32_t *nops;
+};
+
+struct cb_getattrargs {
+	struct sockaddr_in *addr;
+	struct nfs_fh fh;
+	uint32_t bitmap[2];
+};
+
+struct cb_getattrres {
+	uint32_t status;
+	uint32_t bitmap[2];
+	uint64_t size;
+	uint64_t change_attr;
+	struct timespec ctime;
+	struct timespec mtime;
+};
+
+struct cb_recallargs {
+	struct sockaddr_in *addr;
+	struct nfs_fh fh;
+	nfs4_stateid stateid;
+	uint32_t truncate;
+};
+
+extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+
+extern int nfs_callback_up(void);
+extern int nfs_callback_down(void);
+
+extern unsigned short nfs_callback_tcpport;
+
+#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
new file mode 100644
index 00000000000..ece27e42b93
--- /dev/null
+++ b/fs/nfs/callback_proc.c
@@ -0,0 +1,85 @@
+/*
+ * linux/fs/nfs/callback_proc.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback procedures
+ */
+#include <linux/config.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+ 
+unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+{
+	struct nfs4_client *clp;
+	struct nfs_delegation *delegation;
+	struct nfs_inode *nfsi;
+	struct inode *inode;
+	
+	res->bitmap[0] = res->bitmap[1] = 0;
+	res->status = htonl(NFS4ERR_BADHANDLE);
+	clp = nfs4_find_client(&args->addr->sin_addr);
+	if (clp == NULL)
+		goto out;
+	inode = nfs_delegation_find_inode(clp, &args->fh);
+	if (inode == NULL)
+		goto out_putclient;
+	nfsi = NFS_I(inode);
+	down_read(&nfsi->rwsem);
+	delegation = nfsi->delegation;
+	if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
+		goto out_iput;
+	res->size = i_size_read(inode);
+	res->change_attr = NFS_CHANGE_ATTR(inode);
+	res->ctime = inode->i_ctime;
+	res->mtime = inode->i_mtime;
+	res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
+		args->bitmap[0];
+	res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
+		args->bitmap[1];
+	res->status = 0;
+out_iput:
+	up_read(&nfsi->rwsem);
+	iput(inode);
+out_putclient:
+	nfs4_put_client(clp);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
+	return res->status;
+}
+
+unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+{
+	struct nfs4_client *clp;
+	struct inode *inode;
+	unsigned res;
+	
+	res = htonl(NFS4ERR_BADHANDLE);
+	clp = nfs4_find_client(&args->addr->sin_addr);
+	if (clp == NULL)
+		goto out;
+	inode = nfs_delegation_find_inode(clp, &args->fh);
+	if (inode == NULL)
+		goto out_putclient;
+	/* Set up a helper thread to actually return the delegation */
+	switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
+		case 0:
+			res = 0;
+			break;
+		case -ENOENT:
+			res = htonl(NFS4ERR_BAD_STATEID);
+			break;
+		default:
+			res = htonl(NFS4ERR_RESOURCE);
+	}
+	iput(inode);
+out_putclient:
+	nfs4_put_client(clp);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
+	return res;
+}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
new file mode 100644
index 00000000000..d271df9df2b
--- /dev/null
+++ b/fs/nfs/callback_xdr.c
@@ -0,0 +1,481 @@
+/*
+ * linux/fs/nfs/callback_xdr.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback encode/decode procedures
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define CB_OP_TAGLEN_MAXSZ	(512)
+#define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
+#define CB_OP_GETATTR_BITMAP_MAXSZ	(4)
+#define CB_OP_GETATTR_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+				CB_OP_GETATTR_BITMAP_MAXSZ + \
+				2 + 2 + 3 + 3)
+#define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+typedef unsigned (*callback_process_op_t)(void *, void *);
+typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
+typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
+
+
+struct callback_op {
+	callback_process_op_t process_op;
+	callback_decode_arg_t decode_args;
+	callback_encode_res_t encode_res;
+	long res_maxsize;
+};
+
+static struct callback_op callback_ops[];
+
+static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return htonl(NFS4_OK);
+}
+
+static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
+{
+	uint32_t *p;
+
+	p = xdr_inline_decode(xdr, nbytes);
+	if (unlikely(p == NULL))
+		printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
+	return p;
+}
+
+static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*len = ntohl(*p);
+
+	if (*len != 0) {
+		p = read_buf(xdr, *len);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*str = (const char *)p;
+	} else
+		*str = NULL;
+
+	return 0;
+}
+
+static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	fh->size = ntohl(*p);
+	if (fh->size > NFS4_FHSIZE)
+		return htonl(NFS4ERR_BADHANDLE);
+	p = read_buf(xdr, fh->size);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	memcpy(&fh->data[0], p, fh->size);
+	memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size);
+	return 0;
+}
+
+static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+	uint32_t *p;
+	unsigned int attrlen;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	attrlen = ntohl(*p);
+	p = read_buf(xdr, attrlen << 2);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	if (likely(attrlen > 0))
+		bitmap[0] = ntohl(*p++);
+	if (attrlen > 1)
+		bitmap[1] = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 16);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	memcpy(stateid->data, p, 16);
+	return 0;
+}
+
+static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
+{
+	uint32_t *p;
+	unsigned int minor_version;
+	unsigned status;
+
+	status = decode_string(xdr, &hdr->taglen, &hdr->tag);
+	if (unlikely(status != 0))
+		return status;
+	/* We do not like overly long tags! */
+	if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
+		printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
+				__FUNCTION__, hdr->taglen);
+		return htonl(NFS4ERR_RESOURCE);
+	}
+	p = read_buf(xdr, 12);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	minor_version = ntohl(*p++);
+	/* Check minor version is zero. */
+	if (minor_version != 0) {
+		printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
+				__FUNCTION__, minor_version);
+		return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+	}
+	hdr->callback_ident = ntohl(*p++);
+	hdr->nops = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
+{
+	uint32_t *p;
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*op = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+{
+	unsigned status;
+
+	status = decode_fh(xdr, &args->fh);
+	if (unlikely(status != 0))
+		goto out;
+	args->addr = &rqstp->rq_addr;
+	status = decode_bitmap(xdr, args->bitmap);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+{
+	uint32_t *p;
+	unsigned status;
+
+	args->addr = &rqstp->rq_addr;
+	status = decode_stateid(xdr, &args->stateid);
+	if (unlikely(status != 0))
+		goto out;
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL)) {
+		status = htonl(NFS4ERR_RESOURCE);
+		goto out;
+	}
+	args->truncate = ntohl(*p);
+	status = decode_fh(xdr, &args->fh);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return 0;
+}
+
+static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, 4 + len);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	xdr_encode_opaque(p, str, len);
+	return 0;
+}
+
+#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
+#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
+static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
+{
+	uint32_t bm[2];
+	uint32_t *p;
+
+	bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
+	bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
+	if (bm[1] != 0) {
+		p = xdr_reserve_space(xdr, 16);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(2);
+		*p++ = bm[0];
+		*p++ = bm[1];
+	} else if (bm[0] != 0) {
+		p = xdr_reserve_space(xdr, 12);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(1);
+		*p++ = bm[0];
+	} else {
+		p = xdr_reserve_space(xdr, 8);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(0);
+	}
+	*savep = p;
+	return 0;
+}
+
+static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
+{
+	uint32_t *p;
+
+	if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
+		return 0;
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, change);
+	return 0;
+}
+
+static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
+{
+	uint32_t *p;
+
+	if (!(bitmap[0] & FATTR4_WORD0_SIZE))
+		return 0;
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, size);
+	return 0;
+}
+
+static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, 12);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, time->tv_sec);
+	*p = htonl(time->tv_nsec);
+	return 0;
+}
+
+static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+	if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
+		return 0;
+	return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+	if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
+		return 0;
+	return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
+{
+	unsigned status;
+
+	hdr->status = xdr_reserve_space(xdr, 4);
+	if (unlikely(hdr->status == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	status = encode_string(xdr, hdr->taglen, hdr->tag);
+	if (unlikely(status != 0))
+		return status;
+	hdr->nops = xdr_reserve_space(xdr, 4);
+	if (unlikely(hdr->nops == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	return 0;
+}
+
+static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
+{
+	uint32_t *p;
+	
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*p++ = htonl(op);
+	*p = res;
+	return 0;
+}
+
+static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+{
+	uint32_t *savep;
+	unsigned status = res->status;
+	
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_bitmap(xdr, res->bitmap, &savep);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_change(xdr, res->bitmap, res->change_attr);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_size(xdr, res->bitmap, res->size);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+	*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+static unsigned process_op(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr_in, void *argp,
+		struct xdr_stream *xdr_out, void *resp)
+{
+	struct callback_op *op;
+	unsigned int op_nr;
+	unsigned int status = 0;
+	long maxlen;
+	unsigned res;
+
+	dprintk("%s: start\n", __FUNCTION__);
+	status = decode_op_hdr(xdr_in, &op_nr);
+	if (unlikely(status != 0)) {
+		op_nr = OP_CB_ILLEGAL;
+		op = &callback_ops[0];
+	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
+		op_nr = OP_CB_ILLEGAL;
+		op = &callback_ops[0];
+		status = htonl(NFS4ERR_OP_ILLEGAL);
+	} else
+		op = &callback_ops[op_nr];
+
+	maxlen = xdr_out->end - xdr_out->p;
+	if (maxlen > 0 && maxlen < PAGE_SIZE) {
+		if (likely(status == 0 && op->decode_args != NULL))
+			status = op->decode_args(rqstp, xdr_in, argp);
+		if (likely(status == 0 && op->process_op != NULL))
+			status = op->process_op(argp, resp);
+	} else
+		status = htonl(NFS4ERR_RESOURCE);
+
+	res = encode_op_hdr(xdr_out, op_nr, status);
+	if (status == 0)
+		status = res;
+	if (op->encode_res != NULL && status == 0)
+		status = op->encode_res(rqstp, xdr_out, resp);
+	dprintk("%s: done, status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+/*
+ * Decode, process and encode a COMPOUND
+ */
+static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	struct cb_compound_hdr_arg hdr_arg;
+	struct cb_compound_hdr_res hdr_res;
+	struct xdr_stream xdr_in, xdr_out;
+	uint32_t *p;
+	unsigned int status;
+	unsigned int nops = 1;
+
+	dprintk("%s: start\n", __FUNCTION__);
+
+	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+
+	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
+	rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
+	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+
+	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
+	hdr_res.taglen = hdr_arg.taglen;
+	hdr_res.tag = hdr_arg.tag;
+	encode_compound_hdr_res(&xdr_out, &hdr_res);
+
+	for (;;) {
+		status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
+		if (status != 0)
+			break;
+		if (nops == hdr_arg.nops)
+			break;
+		nops++;
+	}
+	*hdr_res.status = status;
+	*hdr_res.nops = htonl(nops);
+	dprintk("%s: done, status = %u\n", __FUNCTION__, status);
+	return rpc_success;
+}
+
+/*
+ * Define NFS4 callback COMPOUND ops.
+ */
+static struct callback_op callback_ops[] = {
+	[0] = {
+		.res_maxsize = CB_OP_HDR_RES_MAXSZ,
+	},
+	[OP_CB_GETATTR] = {
+		.process_op = (callback_process_op_t)nfs4_callback_getattr,
+		.decode_args = (callback_decode_arg_t)decode_getattr_args,
+		.encode_res = (callback_encode_res_t)encode_getattr_res,
+		.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
+	},
+	[OP_CB_RECALL] = {
+		.process_op = (callback_process_op_t)nfs4_callback_recall,
+		.decode_args = (callback_decode_arg_t)decode_recall_args,
+		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+	}
+};
+
+/*
+ * Define NFS4 callback procedures
+ */
+static struct svc_procedure nfs4_callback_procedures1[] = {
+	[CB_NULL] = {
+		.pc_func = nfs4_callback_null,
+		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_xdrressize = 1,
+	},
+	[CB_COMPOUND] = {
+		.pc_func = nfs4_callback_compound,
+		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_argsize = 256,
+		.pc_ressize = 256,
+		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
+	}
+};
+
+struct svc_version nfs4_callback_version1 = {
+	.vs_vers = 1,
+	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
+	.vs_proc = nfs4_callback_procedures1,
+	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
+	.vs_dispatch = NULL,
+};
+
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
new file mode 100644
index 00000000000..5b9c60f9779
--- /dev/null
+++ b/fs/nfs/delegation.c
@@ -0,0 +1,342 @@
+/*
+ * linux/fs/nfs/delegation.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFS file delegation management
+ *
+ */
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "delegation.h"
+
+static struct nfs_delegation *nfs_alloc_delegation(void)
+{
+	return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+	if (delegation->cred)
+		put_rpccred(delegation->cred);
+	kfree(delegation);
+}
+
+static void nfs_delegation_claim_opens(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_open_context *ctx;
+	struct nfs4_state *state;
+
+again:
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(ctx, &nfsi->open_files, list) {
+		state = ctx->state;
+		if (state == NULL)
+			continue;
+		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+			continue;
+		get_nfs_open_context(ctx);
+		spin_unlock(&inode->i_lock);
+		if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
+			return;
+		put_nfs_open_context(ctx);
+		goto again;
+	}
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+
+	if (delegation == NULL)
+		return;
+	memcpy(delegation->stateid.data, res->delegation.data,
+			sizeof(delegation->stateid.data));
+	delegation->type = res->delegation_type;
+	delegation->maxsize = res->maxsize;
+	put_rpccred(cred);
+	delegation->cred = get_rpccred(cred);
+	delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
+	NFS_I(inode)->delegation_state = delegation->type;
+	smp_wmb();
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int status = 0;
+
+	delegation = nfs_alloc_delegation();
+	if (delegation == NULL)
+		return -ENOMEM;
+	memcpy(delegation->stateid.data, res->delegation.data,
+			sizeof(delegation->stateid.data));
+	delegation->type = res->delegation_type;
+	delegation->maxsize = res->maxsize;
+	delegation->cred = get_rpccred(cred);
+	delegation->inode = inode;
+
+	spin_lock(&clp->cl_lock);
+	if (nfsi->delegation == NULL) {
+		list_add(&delegation->super_list, &clp->cl_delegations);
+		nfsi->delegation = delegation;
+		nfsi->delegation_state = delegation->type;
+		delegation = NULL;
+	} else {
+		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
+					sizeof(delegation->stateid)) != 0 ||
+				delegation->type != nfsi->delegation->type) {
+			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
+					__FUNCTION__, NIPQUAD(clp->cl_addr));
+			status = -EIO;
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+	if (delegation != NULL)
+		kfree(delegation);
+	return status;
+}
+
+static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
+{
+	int res = 0;
+
+	__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
+	nfs_free_delegation(delegation);
+	return res;
+}
+
+/* Sync all data to disk upon delegation return */
+static void nfs_msync_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+	nfs_wb_all(inode);
+	filemap_fdatawait(inode->i_mapping);
+}
+
+/*
+ * Basic procedure for returning a delegation to the server
+ */
+int nfs_inode_return_delegation(struct inode *inode)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int res = 0;
+
+	nfs_msync_inode(inode);
+	down_read(&clp->cl_sem);
+	/* Guard against new delegated open calls */
+	down_write(&nfsi->rwsem);
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL) {
+		list_del_init(&delegation->super_list);
+		nfsi->delegation = NULL;
+		nfsi->delegation_state = 0;
+	}
+	spin_unlock(&clp->cl_lock);
+	nfs_delegation_claim_opens(inode);
+	up_write(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	nfs_msync_inode(inode);
+
+	if (delegation != NULL)
+		res = nfs_do_return_delegation(inode, delegation);
+	return res;
+}
+
+/*
+ * Return all delegations associated to a super block
+ */
+void nfs_return_all_delegations(struct super_block *sb)
+{
+	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_delegation *delegation;
+	struct inode *inode;
+
+	if (clp == NULL)
+		return;
+restart:
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		if (delegation->inode->i_sb != sb)
+			continue;
+		inode = igrab(delegation->inode);
+		if (inode == NULL)
+			continue;
+		spin_unlock(&clp->cl_lock);
+		nfs_inode_return_delegation(inode);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+ */
+void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation;
+	struct inode *inode;
+
+	if (clp == NULL)
+		return;
+restart:
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		inode = igrab(delegation->inode);
+		if (inode == NULL)
+			continue;
+		spin_unlock(&clp->cl_lock);
+		nfs_inode_return_delegation(inode);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&clp->cl_lock);
+}
+
+struct recall_threadargs {
+	struct inode *inode;
+	struct nfs4_client *clp;
+	const nfs4_stateid *stateid;
+
+	struct completion started;
+	int result;
+};
+
+static int recall_thread(void *data)
+{
+	struct recall_threadargs *args = (struct recall_threadargs *)data;
+	struct inode *inode = igrab(args->inode);
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+
+	daemonize("nfsv4-delegreturn");
+
+	nfs_msync_inode(inode);
+	down_read(&clp->cl_sem);
+	down_write(&nfsi->rwsem);
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL && memcmp(delegation->stateid.data,
+				args->stateid->data,
+				sizeof(delegation->stateid.data)) == 0) {
+		list_del_init(&delegation->super_list);
+		nfsi->delegation = NULL;
+		nfsi->delegation_state = 0;
+		args->result = 0;
+	} else {
+		delegation = NULL;
+		args->result = -ENOENT;
+	}
+	spin_unlock(&clp->cl_lock);
+	complete(&args->started);
+	nfs_delegation_claim_opens(inode);
+	up_write(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	nfs_msync_inode(inode);
+
+	if (delegation != NULL)
+		nfs_do_return_delegation(inode, delegation);
+	iput(inode);
+	module_put_and_exit(0);
+}
+
+/*
+ * Asynchronous delegation recall!
+ */
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+{
+	struct recall_threadargs data = {
+		.inode = inode,
+		.stateid = stateid,
+	};
+	int status;
+
+	init_completion(&data.started);
+	__module_get(THIS_MODULE);
+	status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
+	if (status < 0)
+		goto out_module_put;
+	wait_for_completion(&data.started);
+	return data.result;
+out_module_put:
+	module_put(THIS_MODULE);
+	return status;
+}
+
+/*
+ * Retrieve the inode associated with a delegation
+ */
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+{
+	struct nfs_delegation *delegation;
+	struct inode *res = NULL;
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
+			res = igrab(delegation->inode);
+			break;
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+	return res;
+}
+
+/*
+ * Mark all delegations as needing to be reclaimed
+ */
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation;
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+		delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Reap all unclaimed delegations after reboot recovery is done
+ */
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation, *n;
+	LIST_HEAD(head);
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+		if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
+			continue;
+		list_move(&delegation->super_list, &head);
+		NFS_I(delegation->inode)->delegation = NULL;
+		NFS_I(delegation->inode)->delegation_state = 0;
+	}
+	spin_unlock(&clp->cl_lock);
+	while(!list_empty(&head)) {
+		delegation = list_entry(head.next, struct nfs_delegation, super_list);
+		list_del(&delegation->super_list);
+		nfs_free_delegation(delegation);
+	}
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
new file mode 100644
index 00000000000..3f6c45a29d6
--- /dev/null
+++ b/fs/nfs/delegation.h
@@ -0,0 +1,57 @@
+/*
+ * linux/fs/nfs/delegation.h
+ *
+ * Copyright (c) Trond Myklebust
+ *
+ * Definitions pertaining to NFS delegated files
+ */
+#ifndef FS_NFS_DELEGATION_H
+#define FS_NFS_DELEGATION_H
+
+#if defined(CONFIG_NFS_V4)
+/*
+ * NFSv4 delegation
+ */
+struct nfs_delegation {
+	struct list_head super_list;
+	struct rpc_cred *cred;
+	struct inode *inode;
+	nfs4_stateid stateid;
+	int type;
+#define NFS_DELEGATION_NEED_RECLAIM 1
+	long flags;
+	loff_t maxsize;
+};
+
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+int nfs_inode_return_delegation(struct inode *inode);
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+void nfs_return_all_delegations(struct super_block *sb);
+void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+
+/* NFSv4 delegation-related procedures */
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+	flags &= FMODE_READ|FMODE_WRITE;
+	smp_rmb();
+	if ((NFS_I(inode)->delegation_state & flags) == flags)
+		return 1;
+	return 0;
+}
+#else
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
new file mode 100644
index 00000000000..73f96acd5d3
--- /dev/null
+++ b/fs/nfs/dir.c
@@ -0,0 +1,1562 @@
+/*
+ *  linux/fs/nfs/dir.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  nfs directory handling functions
+ *
+ * 10 Apr 1996	Added silly rename for unlink	--okir
+ * 28 Sep 1996	Improved directory cache --okir
+ * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de 
+ *              Re-implemented silly rename for unlink, newly implemented
+ *              silly rename for nfs_rename() following the suggestions
+ *              of Olaf Kirch (okir) found in this file.
+ *              Following Linus comments on my original hack, this version
+ *              depends only on the dcache stuff and doesn't touch the inode
+ *              layer (iput() and friends).
+ *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
+ */
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+#include "delegation.h"
+
+#define NFS_PARANOIA 1
+/* #define NFS_DEBUG_VERBOSE 1 */
+
+static int nfs_opendir(struct inode *, struct file *);
+static int nfs_readdir(struct file *, void *, filldir_t);
+static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int nfs_mkdir(struct inode *, struct dentry *, int);
+static int nfs_rmdir(struct inode *, struct dentry *);
+static int nfs_unlink(struct inode *, struct dentry *);
+static int nfs_symlink(struct inode *, struct dentry *, const char *);
+static int nfs_link(struct dentry *, struct inode *, struct dentry *);
+static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
+static int nfs_rename(struct inode *, struct dentry *,
+		      struct inode *, struct dentry *);
+static int nfs_fsync_dir(struct file *, struct dentry *, int);
+
+struct file_operations nfs_dir_operations = {
+	.read		= generic_read_dir,
+	.readdir	= nfs_readdir,
+	.open		= nfs_opendir,
+	.release	= nfs_release,
+	.fsync		= nfs_fsync_dir,
+};
+
+struct inode_operations nfs_dir_inode_operations = {
+	.create		= nfs_create,
+	.lookup		= nfs_lookup,
+	.link		= nfs_link,
+	.unlink		= nfs_unlink,
+	.symlink	= nfs_symlink,
+	.mkdir		= nfs_mkdir,
+	.rmdir		= nfs_rmdir,
+	.mknod		= nfs_mknod,
+	.rename		= nfs_rename,
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
+
+#ifdef CONFIG_NFS_V4
+
+static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct inode_operations nfs4_dir_inode_operations = {
+	.create		= nfs_create,
+	.lookup		= nfs_atomic_lookup,
+	.link		= nfs_link,
+	.unlink		= nfs_unlink,
+	.symlink	= nfs_symlink,
+	.mkdir		= nfs_mkdir,
+	.rmdir		= nfs_rmdir,
+	.mknod		= nfs_mknod,
+	.rename		= nfs_rename,
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Open file
+ */
+static int
+nfs_opendir(struct inode *inode, struct file *filp)
+{
+	int res = 0;
+
+	lock_kernel();
+	/* Call generic open code in order to cache credentials */
+	if (!res)
+		res = nfs_open(inode, filp);
+	unlock_kernel();
+	return res;
+}
+
+typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
+typedef struct {
+	struct file	*file;
+	struct page	*page;
+	unsigned long	page_index;
+	u32		*ptr;
+	u64		target;
+	struct nfs_entry *entry;
+	decode_dirent_t	decode;
+	int		plus;
+	int		error;
+} nfs_readdir_descriptor_t;
+
+/* Now we cache directories properly, by stuffing the dirent
+ * data directly in the page cache.
+ *
+ * Inode invalidation due to refresh etc. takes care of
+ * _everything_, no sloppy entry flushing logic, no extraneous
+ * copying, network direct to page cache, the way it was meant
+ * to be.
+ *
+ * NOTE: Dirent information verification is done always by the
+ *	 page-in of the RPC reply, nowhere else, this simplies
+ *	 things substantially.
+ */
+static
+int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
+{
+	struct file	*file = desc->file;
+	struct inode	*inode = file->f_dentry->d_inode;
+	struct rpc_cred	*cred = nfs_file_cred(file);
+	unsigned long	timestamp;
+	int		error;
+
+	dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+
+ again:
+	timestamp = jiffies;
+	error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
+					  NFS_SERVER(inode)->dtsize, desc->plus);
+	if (error < 0) {
+		/* We requested READDIRPLUS, but the server doesn't grok it */
+		if (error == -ENOTSUPP && desc->plus) {
+			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
+			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			desc->plus = 0;
+			goto again;
+		}
+		goto error;
+	}
+	SetPageUptodate(page);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+	/* Ensure consistent page alignment of the data.
+	 * Note: assumes we have exclusive access to this mapping either
+	 *	 throught inode->i_sem or some other mechanism.
+	 */
+	if (page->index == 0) {
+		invalidate_inode_pages(inode->i_mapping);
+		NFS_I(inode)->readdir_timestamp = timestamp;
+	}
+	unlock_page(page);
+	return 0;
+ error:
+	SetPageError(page);
+	unlock_page(page);
+	nfs_zap_caches(inode);
+	desc->error = error;
+	return -EIO;
+}
+
+static inline
+int dir_decode(nfs_readdir_descriptor_t *desc)
+{
+	u32	*p = desc->ptr;
+	p = desc->decode(p, desc->entry, desc->plus);
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+	desc->ptr = p;
+	return 0;
+}
+
+static inline
+void dir_page_release(nfs_readdir_descriptor_t *desc)
+{
+	kunmap(desc->page);
+	page_cache_release(desc->page);
+	desc->page = NULL;
+	desc->ptr = NULL;
+}
+
+/*
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the next entry.
+ *
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
+{
+	struct nfs_entry *entry = desc->entry;
+	int		loop_count = 0,
+			status;
+
+	while((status = dir_decode(desc)) == 0) {
+		dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
+		if (entry->prev_cookie == desc->target)
+			break;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
+	return status;
+}
+
+/*
+ * Find the given page, and call find_dirent() in order to try to
+ * return the next entry.
+ */
+static inline
+int find_dirent_page(nfs_readdir_descriptor_t *desc)
+{
+	struct inode	*inode = desc->file->f_dentry->d_inode;
+	struct page	*page;
+	int		status;
+
+	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+
+	page = read_cache_page(inode->i_mapping, desc->page_index,
+			       (filler_t *)nfs_readdir_filler, desc);
+	if (IS_ERR(page)) {
+		status = PTR_ERR(page);
+		goto out;
+	}
+	if (!PageUptodate(page))
+		goto read_error;
+
+	/* NOTE: Someone else may have changed the READDIRPLUS flag */
+	desc->page = page;
+	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
+	status = find_dirent(desc, page);
+	if (status < 0)
+		dir_page_release(desc);
+ out:
+	dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+	return status;
+ read_error:
+	page_cache_release(page);
+	return -EIO;
+}
+
+/*
+ * Recurse through the page cache pages, and return a
+ * filled nfs_entry structure of the next directory entry if possible.
+ *
+ * The target for the search is 'desc->target'.
+ */
+static inline
+int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
+{
+	int		loop_count = 0;
+	int		res;
+
+	dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
+	for (;;) {
+		res = find_dirent_page(desc);
+		if (res != -EAGAIN)
+			break;
+		/* Align to beginning of next page */
+		desc->page_index ++;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+	return res;
+}
+
+static inline unsigned int dt_type(struct inode *inode)
+{
+	return (inode->i_mode >> 12) & 15;
+}
+
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
+
+/*
+ * Once we've found the start of the dirent within a page: fill 'er up...
+ */
+static 
+int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
+		   filldir_t filldir)
+{
+	struct file	*file = desc->file;
+	struct nfs_entry *entry = desc->entry;
+	struct dentry	*dentry = NULL;
+	unsigned long	fileid;
+	int		loop_count = 0,
+			res;
+
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
+
+	for(;;) {
+		unsigned d_type = DT_UNKNOWN;
+		/* Note: entry->prev_cookie contains the cookie for
+		 *	 retrieving the current dirent on the server */
+		fileid = nfs_fileid_to_ino_t(entry->ino);
+
+		/* Get a dentry if we have one */
+		if (dentry != NULL)
+			dput(dentry);
+		dentry = nfs_readdir_lookup(desc);
+
+		/* Use readdirplus info */
+		if (dentry != NULL && dentry->d_inode != NULL) {
+			d_type = dt_type(dentry->d_inode);
+			fileid = dentry->d_inode->i_ino;
+		}
+
+		res = filldir(dirent, entry->name, entry->len, 
+			      entry->prev_cookie, fileid, d_type);
+		if (res < 0)
+			break;
+		file->f_pos = desc->target = entry->cookie;
+		if (dir_decode(desc) != 0) {
+			desc->page_index ++;
+			break;
+		}
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dir_page_release(desc);
+	if (dentry != NULL)
+		dput(dentry);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
+	return res;
+}
+
+/*
+ * If we cannot find a cookie in our cache, we suspect that this is
+ * because it points to a deleted file, so we ask the server to return
+ * whatever it thinks is the next entry. We then feed this to filldir.
+ * If all goes well, we should then be able to find our way round the
+ * cache on the next call to readdir_search_pagecache();
+ *
+ * NOTE: we cannot add the anonymous page to the pagecache because
+ *	 the data it contains might not be page aligned. Besides,
+ *	 we should already have a complete representation of the
+ *	 directory in the page cache by the time we get here.
+ */
+static inline
+int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
+		     filldir_t filldir)
+{
+	struct file	*file = desc->file;
+	struct inode	*inode = file->f_dentry->d_inode;
+	struct rpc_cred	*cred = nfs_file_cred(file);
+	struct page	*page = NULL;
+	int		status;
+
+	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
+
+	page = alloc_page(GFP_HIGHUSER);
+	if (!page) {
+		status = -ENOMEM;
+		goto out;
+	}
+	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
+						page,
+						NFS_SERVER(inode)->dtsize,
+						desc->plus);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+	desc->page = page;
+	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
+	if (desc->error >= 0) {
+		if ((status = dir_decode(desc)) == 0)
+			desc->entry->prev_cookie = desc->target;
+	} else
+		status = -EIO;
+	if (status < 0)
+		goto out_release;
+
+	status = nfs_do_filldir(desc, dirent, filldir);
+
+	/* Reset read descriptor so it searches the page cache from
+	 * the start upon the next call to readdir_search_pagecache() */
+	desc->page_index = 0;
+	desc->entry->cookie = desc->entry->prev_cookie = 0;
+	desc->entry->eof = 0;
+ out:
+	dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+	return status;
+ out_release:
+	dir_page_release(desc);
+	goto out;
+}
+
+/* The file offset position is now represented as a true offset into the
+ * page cache as is the case in most of the other filesystems.
+ */
+static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+	nfs_readdir_descriptor_t my_desc,
+			*desc = &my_desc;
+	struct nfs_entry my_entry;
+	struct nfs_fh	 fh;
+	struct nfs_fattr fattr;
+	long		res;
+
+	lock_kernel();
+
+	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (res < 0) {
+		unlock_kernel();
+		return res;
+	}
+
+	/*
+	 * filp->f_pos points to the file offset in the page cache.
+	 * but if the cache has meanwhile been zapped, we need to
+	 * read from the last dirent to revalidate f_pos
+	 * itself.
+	 */
+	memset(desc, 0, sizeof(*desc));
+
+	desc->file = filp;
+	desc->target = filp->f_pos;
+	desc->decode = NFS_PROTO(inode)->decode_dirent;
+	desc->plus = NFS_USE_READDIRPLUS(inode);
+
+	my_entry.cookie = my_entry.prev_cookie = 0;
+	my_entry.eof = 0;
+	my_entry.fh = &fh;
+	my_entry.fattr = &fattr;
+	desc->entry = &my_entry;
+
+	while(!desc->entry->eof) {
+		res = readdir_search_pagecache(desc);
+		if (res == -EBADCOOKIE) {
+			/* This means either end of directory */
+			if (desc->entry->cookie != desc->target) {
+				/* Or that the server has 'lost' a cookie */
+				res = uncached_readdir(desc, dirent, filldir);
+				if (res >= 0)
+					continue;
+			}
+			res = 0;
+			break;
+		}
+		if (res == -ETOOSMALL && desc->plus) {
+			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			nfs_zap_caches(inode);
+			desc->plus = 0;
+			desc->entry->eof = 0;
+			continue;
+		}
+		if (res < 0)
+			break;
+
+		res = nfs_do_filldir(desc, dirent, filldir);
+		if (res < 0) {
+			res = 0;
+			break;
+		}
+	}
+	unlock_kernel();
+	if (desc->error < 0)
+		return desc->error;
+	if (res < 0)
+		return res;
+	return 0;
+}
+
+/*
+ * All directory operations under NFS are synchronous, so fsync()
+ * is a dummy operation.
+ */
+int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
+{
+	return 0;
+}
+
+/*
+ * A check for whether or not the parent directory has changed.
+ * In the case it has, we assume that the dentries are untrustworthy
+ * and may need to be looked up again.
+ */
+static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+{
+	if (IS_ROOT(dentry))
+		return 1;
+	if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
+			|| nfs_attribute_timeout(dir))
+		return 0;
+	return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
+}
+
+static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
+{
+	dentry->d_fsdata = (void *)verf;
+}
+
+/*
+ * Whenever an NFS operation succeeds, we know that the dentry
+ * is valid, so we update the revalidation timestamp.
+ */
+static inline void nfs_renew_times(struct dentry * dentry)
+{
+	dentry->d_time = jiffies;
+}
+
+static inline
+int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+
+	if (nd != NULL) {
+		int ndflags = nd->flags;
+		/* VFS wants an on-the-wire revalidation */
+		if (ndflags & LOOKUP_REVAL)
+			goto out_force;
+		/* This is an open(2) */
+		if ((ndflags & LOOKUP_OPEN) &&
+				!(ndflags & LOOKUP_CONTINUE) &&
+				!(server->flags & NFS_MOUNT_NOCTO))
+			goto out_force;
+	}
+	return nfs_revalidate_inode(server, inode);
+out_force:
+	return __nfs_revalidate_inode(server, inode);
+}
+
+/*
+ * We judge how long we want to trust negative
+ * dentries by looking at the parent inode mtime.
+ *
+ * If parent mtime has changed, we revalidate, else we wait for a
+ * period corresponding to the parent's attribute cache timeout value.
+ */
+static inline
+int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
+		       struct nameidata *nd)
+{
+	int ndflags = 0;
+
+	if (nd)
+		ndflags = nd->flags;
+	/* Don't revalidate a negative dentry if we're creating a new file */
+	if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE))
+		return 0;
+	return !nfs_check_verifier(dir, dentry);
+}
+
+/*
+ * This is called every time the dcache has a lookup hit,
+ * and we should check whether we can really trust that
+ * lookup.
+ *
+ * NOTE! The hit can be a negative hit too, don't assume
+ * we have an inode!
+ *
+ * If the parent directory is seen to have changed, we throw out the
+ * cached dentry and do a new lookup.
+ */
+static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
+{
+	struct inode *dir;
+	struct inode *inode;
+	struct dentry *parent;
+	int error;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	unsigned long verifier;
+
+	parent = dget_parent(dentry);
+	lock_kernel();
+	dir = parent->d_inode;
+	inode = dentry->d_inode;
+
+	if (!inode) {
+		if (nfs_neg_need_reval(dir, dentry, nd))
+			goto out_bad;
+		goto out_valid;
+	}
+
+	if (is_bad_inode(inode)) {
+		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name);
+		goto out_bad;
+	}
+
+	/* Revalidate parent directory attribute cache */
+	if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+		goto out_zap_parent;
+
+	/* Force a full look up iff the parent directory has changed */
+	if (nfs_check_verifier(dir, dentry)) {
+		if (nfs_lookup_verify_inode(inode, nd))
+			goto out_zap_parent;
+		goto out_valid;
+	}
+
+	if (NFS_STALE(inode))
+		goto out_bad;
+
+	verifier = nfs_save_change_attribute(dir);
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+	if (error)
+		goto out_bad;
+	if (nfs_compare_fh(NFS_FH(inode), &fhandle))
+		goto out_bad;
+	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
+		goto out_bad;
+
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, verifier);
+ out_valid:
+	unlock_kernel();
+	dput(parent);
+	return 1;
+out_zap_parent:
+	nfs_zap_caches(dir);
+ out_bad:
+	NFS_CACHEINV(dir);
+	if (inode && S_ISDIR(inode->i_mode)) {
+		/* Purge readdir caches. */
+		nfs_zap_caches(inode);
+		/* If we have submounts, don't unhash ! */
+		if (have_submounts(dentry))
+			goto out_valid;
+		shrink_dcache_parent(dentry);
+	}
+	d_drop(dentry);
+	unlock_kernel();
+	dput(parent);
+	return 0;
+}
+
+/*
+ * This is called from dput() when d_count is going to 0.
+ */
+static int nfs_dentry_delete(struct dentry *dentry)
+{
+	dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		dentry->d_flags);
+
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		/* Unhash it, so that ->d_iput() would be called */
+		return 1;
+	}
+	if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
+		/* Unhash it, so that ancestors of killed async unlink
+		 * files will be cleaned up during umount */
+		return 1;
+	}
+	return 0;
+
+}
+
+/*
+ * Called when the dentry loses inode.
+ * We use it to clean up silly-renamed files.
+ */
+static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		lock_kernel();
+		inode->i_nlink--;
+		nfs_complete_unlink(dentry);
+		unlock_kernel();
+	}
+	/* When creating a negative dentry, we want to renew d_time */
+	nfs_renew_times(dentry);
+	iput(inode);
+}
+
+struct dentry_operations nfs_dentry_operations = {
+	.d_revalidate	= nfs_lookup_revalidate,
+	.d_delete	= nfs_dentry_delete,
+	.d_iput		= nfs_dentry_iput,
+};
+
+static inline
+int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+{
+	if (NFS_PROTO(dir)->version == 2)
+		return 0;
+	if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+		return 0;
+	return (nd->intent.open.flags & O_EXCL) != 0;
+}
+
+static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+{
+	struct dentry *res;
+	struct inode *inode = NULL;
+	int error;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+
+	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	res = ERR_PTR(-ENAMETOOLONG);
+	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+		goto out;
+
+	res = ERR_PTR(-ENOMEM);
+	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+	lock_kernel();
+	/* Revalidate parent directory attribute cache */
+	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+	if (error < 0) {
+		res = ERR_PTR(error);
+		goto out_unlock;
+	}
+
+	/* If we're doing an exclusive create, optimize away the lookup */
+	if (nfs_is_exclusive_create(dir, nd))
+		goto no_entry;
+
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+	if (error == -ENOENT)
+		goto no_entry;
+	if (error < 0) {
+		res = ERR_PTR(error);
+		goto out_unlock;
+	}
+	res = ERR_PTR(-EACCES);
+	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
+	if (!inode)
+		goto out_unlock;
+no_entry:
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out_unlock:
+	unlock_kernel();
+out:
+	return res;
+}
+
+#ifdef CONFIG_NFS_V4
+static int nfs_open_revalidate(struct dentry *, struct nameidata *);
+
+struct dentry_operations nfs4_dentry_operations = {
+	.d_revalidate	= nfs_open_revalidate,
+	.d_delete	= nfs_dentry_delete,
+	.d_iput		= nfs_dentry_iput,
+};
+
+static int is_atomic_open(struct inode *dir, struct nameidata *nd)
+{
+	if (!nd)
+		return 0;
+	/* Check that we are indeed trying to open this file */
+	if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN))
+		return 0;
+	/* NFS does not (yet) have a stateful open for directories */
+	if (nd->flags & LOOKUP_DIRECTORY)
+		return 0;
+	/* Are we trying to write to a read only partition? */
+	if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+		return 0;
+	return 1;
+}
+
+static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *res = NULL;
+	struct inode *inode = NULL;
+	int error;
+
+	/* Check that we are indeed trying to open this file */
+	if (!is_atomic_open(dir, nd))
+		goto no_open;
+
+	if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
+		res = ERR_PTR(-ENAMETOOLONG);
+		goto out;
+	}
+	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+	/* Let vfs_create() deal with O_EXCL */
+	if (nd->intent.open.flags & O_EXCL)
+		goto no_entry;
+
+	/* Open the file on the server */
+	lock_kernel();
+	/* Revalidate parent directory attribute cache */
+	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+	if (error < 0) {
+		res = ERR_PTR(error);
+		goto out;
+	}
+
+	if (nd->intent.open.flags & O_CREAT) {
+		nfs_begin_data_update(dir);
+		inode = nfs4_atomic_open(dir, dentry, nd);
+		nfs_end_data_update(dir);
+	} else
+		inode = nfs4_atomic_open(dir, dentry, nd);
+	unlock_kernel();
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		switch (error) {
+			/* Make a negative dentry */
+			case -ENOENT:
+				inode = NULL;
+				break;
+			/* This turned out not to be a regular file */
+			case -ELOOP:
+				if (!(nd->intent.open.flags & O_NOFOLLOW))
+					goto no_open;
+			/* case -EISDIR: */
+			/* case -EINVAL: */
+			default:
+				res = ERR_PTR(error);
+				goto out;
+		}
+	}
+no_entry:
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out:
+	return res;
+no_open:
+	return nfs_lookup(dir, dentry, nd);
+}
+
+static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *parent = NULL;
+	struct inode *inode = dentry->d_inode;
+	struct inode *dir;
+	unsigned long verifier;
+	int openflags, ret = 0;
+
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
+	if (!is_atomic_open(dir, nd))
+		goto no_open;
+	/* We can't create new files in nfs_open_revalidate(), so we
+	 * optimize away revalidation of negative dentries.
+	 */
+	if (inode == NULL)
+		goto out;
+	/* NFS only supports OPEN on regular files */
+	if (!S_ISREG(inode->i_mode))
+		goto no_open;
+	openflags = nd->intent.open.flags;
+	/* We cannot do exclusive creation on a positive dentry */
+	if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+		goto no_open;
+	/* We can't create new files, or truncate existing ones here */
+	openflags &= ~(O_CREAT|O_TRUNC);
+
+	/*
+	 * Note: we're not holding inode->i_sem and so may be racing with
+	 * operations that change the directory. We therefore save the
+	 * change attribute *before* we do the RPC call.
+	 */
+	lock_kernel();
+	verifier = nfs_save_change_attribute(dir);
+	ret = nfs4_open_revalidate(dir, dentry, openflags);
+	if (!ret)
+		nfs_set_verifier(dentry, verifier);
+	unlock_kernel();
+out:
+	dput(parent);
+	if (!ret)
+		d_drop(dentry);
+	return ret;
+no_open:
+	dput(parent);
+	if (inode != NULL && nfs_have_delegation(inode, FMODE_READ))
+		return 1;
+	return nfs_lookup_revalidate(dentry, nd);
+}
+#endif /* CONFIG_NFSV4 */
+
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
+{
+	struct dentry *parent = desc->file->f_dentry;
+	struct inode *dir = parent->d_inode;
+	struct nfs_entry *entry = desc->entry;
+	struct dentry *dentry, *alias;
+	struct qstr name = {
+		.name = entry->name,
+		.len = entry->len,
+	};
+	struct inode *inode;
+
+	switch (name.len) {
+		case 2:
+			if (name.name[0] == '.' && name.name[1] == '.')
+				return dget_parent(parent);
+			break;
+		case 1:
+			if (name.name[0] == '.')
+				return dget(parent);
+	}
+	name.hash = full_name_hash(name.name, name.len);
+	dentry = d_lookup(parent, &name);
+	if (dentry != NULL)
+		return dentry;
+	if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
+		return NULL;
+	/* Note: caller is already holding the dir->i_sem! */
+	dentry = d_alloc(parent, &name);
+	if (dentry == NULL)
+		return NULL;
+	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+	alias = d_add_unique(dentry, inode);
+	if (alias != NULL) {
+		dput(dentry);
+		dentry = alias;
+	}
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	return dentry;
+}
+
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+				struct nfs_fattr *fattr)
+{
+	struct inode *inode;
+	int error = -EACCES;
+
+	/* We may have been initialized further down */
+	if (dentry->d_inode)
+		return 0;
+	if (fhandle->size == 0) {
+		struct inode *dir = dentry->d_parent->d_inode;
+		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+		if (error)
+			goto out_err;
+	}
+	if (!(fattr->valid & NFS_ATTR_FATTR)) {
+		struct nfs_server *server = NFS_SB(dentry->d_sb);
+		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		if (error < 0)
+			goto out_err;
+	}
+	error = -ENOMEM;
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+	if (inode == NULL)
+		goto out_err;
+	d_instantiate(dentry, inode);
+	return 0;
+out_err:
+	d_drop(dentry);
+	return error;
+}
+
+/*
+ * Following a failed create operation, we drop the dentry rather
+ * than retain a negative dentry. This avoids a problem in the event
+ * that the operation succeeded on the server, but an error in the
+ * reply path made it appear to have failed.
+ */
+static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct iattr attr;
+	int error;
+	int open_flags = 0;
+
+	dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
+		dir->i_ino, dentry->d_name.name);
+
+	attr.ia_mode = mode;
+	attr.ia_valid = ATTR_MODE;
+
+	if (nd && (nd->flags & LOOKUP_CREATE))
+		open_flags = nd->intent.open.flags;
+
+	lock_kernel();
+	nfs_begin_data_update(dir);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+	nfs_end_data_update(dir);
+	if (error != 0)
+		goto out_err;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	unlock_kernel();
+	return 0;
+out_err:
+	unlock_kernel();
+	d_drop(dentry);
+	return error;
+}
+
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
+static int
+nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+	struct iattr attr;
+	int status;
+
+	dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
+		dir->i_ino, dentry->d_name.name);
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	attr.ia_mode = mode;
+	attr.ia_valid = ATTR_MODE;
+
+	lock_kernel();
+	nfs_begin_data_update(dir);
+	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
+	nfs_end_data_update(dir);
+	if (status != 0)
+		goto out_err;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	unlock_kernel();
+	return 0;
+out_err:
+	unlock_kernel();
+	d_drop(dentry);
+	return status;
+}
+
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
+static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct iattr attr;
+	int error;
+
+	dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
+		dir->i_ino, dentry->d_name.name);
+
+	attr.ia_valid = ATTR_MODE;
+	attr.ia_mode = mode | S_IFDIR;
+
+	lock_kernel();
+	nfs_begin_data_update(dir);
+	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+	nfs_end_data_update(dir);
+	if (error != 0)
+		goto out_err;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	unlock_kernel();
+	return 0;
+out_err:
+	d_drop(dentry);
+	unlock_kernel();
+	return error;
+}
+
+static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
+		dir->i_ino, dentry->d_name.name);
+
+	lock_kernel();
+	nfs_begin_data_update(dir);
+	error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+	/* Ensure the VFS deletes this inode */
+	if (error == 0 && dentry->d_inode != NULL)
+		dentry->d_inode->i_nlink = 0;
+	nfs_end_data_update(dir);
+	unlock_kernel();
+
+	return error;
+}
+
+static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+{
+	static unsigned int sillycounter;
+	const int      i_inosize  = sizeof(dir->i_ino)*2;
+	const int      countersize = sizeof(sillycounter)*2;
+	const int      slen       = sizeof(".nfs") + i_inosize + countersize - 1;
+	char           silly[slen+1];
+	struct qstr    qsilly;
+	struct dentry *sdentry;
+	int            error = -EIO;
+
+	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name, 
+		atomic_read(&dentry->d_count));
+
+#ifdef NFS_PARANOIA
+if (!dentry->d_inode)
+printk("NFS: silly-renaming %s/%s, negative dentry??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+	/*
+	 * We don't allow a dentry to be silly-renamed twice.
+	 */
+	error = -EBUSY;
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		goto out;
+
+	sprintf(silly, ".nfs%*.*lx",
+		i_inosize, i_inosize, dentry->d_inode->i_ino);
+
+	sdentry = NULL;
+	do {
+		char *suffix = silly + slen - countersize;
+
+		dput(sdentry);
+		sillycounter++;
+		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+
+		dfprintk(VFS, "trying to rename %s to %s\n",
+			 dentry->d_name.name, silly);
+		
+		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+		/*
+		 * N.B. Better to return EBUSY here ... it could be
+		 * dangerous to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while(sdentry->d_inode != NULL); /* need negative lookup */
+
+	qsilly.name = silly;
+	qsilly.len  = strlen(silly);
+	nfs_begin_data_update(dir);
+	if (dentry->d_inode) {
+		nfs_begin_data_update(dentry->d_inode);
+		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
+				dir, &qsilly);
+		nfs_end_data_update(dentry->d_inode);
+	} else
+		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
+				dir, &qsilly);
+	nfs_end_data_update(dir);
+	if (!error) {
+		nfs_renew_times(dentry);
+		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+		d_move(dentry, sdentry);
+		error = nfs_async_unlink(dentry);
+ 		/* If we return 0 we don't unlink */
+	}
+	dput(sdentry);
+out:
+	return error;
+}
+
+/*
+ * Remove a file after making sure there are no pending writes,
+ * and after checking that the file has only one user. 
+ *
+ * We invalidate the attribute cache and free the inode prior to the operation
+ * to avoid possible races if the server reuses the inode.
+ */
+static int nfs_safe_remove(struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct inode *inode = dentry->d_inode;
+	int error = -EBUSY;
+		
+	dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	/* If the dentry was sillyrenamed, we simply call d_delete() */
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		error = 0;
+		goto out;
+	}
+
+	nfs_begin_data_update(dir);
+	if (inode != NULL) {
+		nfs_begin_data_update(inode);
+		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+		/* The VFS may want to delete this inode */
+		if (error == 0)
+			inode->i_nlink--;
+		nfs_end_data_update(inode);
+	} else
+		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+	nfs_end_data_update(dir);
+out:
+	return error;
+}
+
+/*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
+ *  belongs to an active ".nfs..." file and we return -EBUSY.
+ *
+ *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
+ */
+static int nfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+	int need_rehash = 0;
+
+	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
+		dir->i_ino, dentry->d_name.name);
+
+	lock_kernel();
+	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
+	if (atomic_read(&dentry->d_count) > 1) {
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&dcache_lock);
+		error = nfs_sillyrename(dir, dentry);
+		unlock_kernel();
+		return error;
+	}
+	if (!d_unhashed(dentry)) {
+		__d_drop(dentry);
+		need_rehash = 1;
+	}
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_lock);
+	error = nfs_safe_remove(dentry);
+	if (!error) {
+		nfs_renew_times(dentry);
+		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	} else if (need_rehash)
+		d_rehash(dentry);
+	unlock_kernel();
+	return error;
+}
+
+static int
+nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	struct iattr attr;
+	struct nfs_fattr sym_attr;
+	struct nfs_fh sym_fh;
+	struct qstr qsymname;
+	int error;
+
+	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
+		dir->i_ino, dentry->d_name.name, symname);
+
+#ifdef NFS_PARANOIA
+if (dentry->d_inode)
+printk("nfs_proc_symlink: %s/%s not negative!\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+	/*
+	 * Fill in the sattr for the call.
+ 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
+	 */
+	attr.ia_valid = ATTR_MODE;
+	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+
+	qsymname.name = symname;
+	qsymname.len  = strlen(symname);
+
+	lock_kernel();
+	nfs_begin_data_update(dir);
+	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
+					  &attr, &sym_fh, &sym_attr);
+	nfs_end_data_update(dir);
+	if (!error) {
+		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
+	} else {
+		if (error == -EEXIST)
+			printk("nfs_proc_symlink: %s/%s already exists??\n",
+			       dentry->d_parent->d_name.name, dentry->d_name.name);
+		d_drop(dentry);
+	}
+	unlock_kernel();
+	return error;
+}
+
+static int 
+nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+	int error;
+
+	dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
+		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	/*
+	 * Drop the dentry in advance to force a new lookup.
+	 * Since nfs_proc_link doesn't return a file handle,
+	 * we can't use the existing dentry.
+	 */
+	lock_kernel();
+	d_drop(dentry);
+
+	nfs_begin_data_update(dir);
+	nfs_begin_data_update(inode);
+	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+	nfs_end_data_update(inode);
+	nfs_end_data_update(dir);
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * RENAME
+ * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
+ * different file handle for the same inode after a rename (e.g. when
+ * moving to a different directory). A fail-safe method to do so would
+ * be to look up old_dir/old_name, create a link to new_dir/new_name and
+ * rename the old file using the sillyrename stuff. This way, the original
+ * file in old_dir will go away when the last process iput()s the inode.
+ *
+ * FIXED.
+ * 
+ * It actually works quite well. One needs to have the possibility for
+ * at least one ".nfs..." file in each directory the file ever gets
+ * moved or linked to which happens automagically with the new
+ * implementation that only depends on the dcache stuff instead of
+ * using the inode layer
+ *
+ * Unfortunately, things are a little more complicated than indicated
+ * above. For a cross-directory move, we want to make sure we can get
+ * rid of the old inode after the operation.  This means there must be
+ * no pending writes (if it's a file), and the use count must be 1.
+ * If these conditions are met, we can drop the dentries before doing
+ * the rename.
+ */
+static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		      struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	struct dentry *dentry = NULL, *rehash = NULL;
+	int error = -EBUSY;
+
+	/*
+	 * To prevent any new references to the target during the rename,
+	 * we unhash the dentry and free the inode in advance.
+	 */
+	lock_kernel();
+	if (!d_unhashed(new_dentry)) {
+		d_drop(new_dentry);
+		rehash = new_dentry;
+	}
+
+	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
+		 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
+		 atomic_read(&new_dentry->d_count));
+
+	/*
+	 * First check whether the target is busy ... we can't
+	 * safely do _any_ rename if the target is in use.
+	 *
+	 * For files, make a copy of the dentry and then do a 
+	 * silly-rename. If the silly-rename succeeds, the
+	 * copied dentry is hashed and becomes the new target.
+	 */
+	if (!new_inode)
+		goto go_ahead;
+	if (S_ISDIR(new_inode->i_mode))
+		goto out;
+	else if (atomic_read(&new_dentry->d_count) > 2) {
+		int err;
+		/* copy the target dentry's name */
+		dentry = d_alloc(new_dentry->d_parent,
+				 &new_dentry->d_name);
+		if (!dentry)
+			goto out;
+
+		/* silly-rename the existing target ... */
+		err = nfs_sillyrename(new_dir, new_dentry);
+		if (!err) {
+			new_dentry = rehash = dentry;
+			new_inode = NULL;
+			/* instantiate the replacement target */
+			d_instantiate(new_dentry, NULL);
+		} else if (atomic_read(&new_dentry->d_count) > 1) {
+		/* dentry still busy? */
+#ifdef NFS_PARANOIA
+			printk("nfs_rename: target %s/%s busy, d_count=%d\n",
+			       new_dentry->d_parent->d_name.name,
+			       new_dentry->d_name.name,
+			       atomic_read(&new_dentry->d_count));
+#endif
+			goto out;
+		}
+	}
+
+go_ahead:
+	/*
+	 * ... prune child dentries and writebacks if needed.
+	 */
+	if (atomic_read(&old_dentry->d_count) > 1) {
+		nfs_wb_all(old_inode);
+		shrink_dcache_parent(old_dentry);
+	}
+
+	if (new_inode)
+		d_delete(new_dentry);
+
+	nfs_begin_data_update(old_dir);
+	nfs_begin_data_update(new_dir);
+	nfs_begin_data_update(old_inode);
+	error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
+					   new_dir, &new_dentry->d_name);
+	nfs_end_data_update(old_inode);
+	nfs_end_data_update(new_dir);
+	nfs_end_data_update(old_dir);
+out:
+	if (rehash)
+		d_rehash(rehash);
+	if (!error) {
+		if (!S_ISDIR(old_inode->i_mode))
+			d_move(old_dentry, new_dentry);
+		nfs_renew_times(new_dentry);
+		nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
+	}
+
+	/* new dentry created? */
+	if (dentry)
+		dput(dentry);
+	unlock_kernel();
+	return error;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+
+	if (cache->cred != cred
+			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
+			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
+		return -ENOENT;
+	memcpy(res, cache, sizeof(*res));
+	return 0;
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+
+	if (cache->cred != set->cred) {
+		if (cache->cred)
+			put_rpccred(cache->cred);
+		cache->cred = get_rpccred(set->cred);
+	}
+	NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
+	cache->jiffies = set->jiffies;
+	cache->mask = set->mask;
+}
+
+static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
+{
+	struct nfs_access_entry cache;
+	int status;
+
+	status = nfs_access_get_cached(inode, cred, &cache);
+	if (status == 0)
+		goto out;
+
+	/* Be clever: ask server to check for all possible rights */
+	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+	cache.cred = cred;
+	cache.jiffies = jiffies;
+	status = NFS_PROTO(inode)->access(inode, &cache);
+	if (status != 0)
+		return status;
+	nfs_access_add_cache(inode, &cache);
+out:
+	if ((cache.mask & mask) == mask)
+		return 0;
+	return -EACCES;
+}
+
+int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct rpc_cred *cred;
+	int res = 0;
+
+	if (mask == 0)
+		goto out;
+	/* Is this sys_access() ? */
+	if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+		goto force_lookup;
+
+	switch (inode->i_mode & S_IFMT) {
+		case S_IFLNK:
+			goto out;
+		case S_IFREG:
+			/* NFSv4 has atomic_open... */
+			if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
+					&& nd != NULL
+					&& (nd->flags & LOOKUP_OPEN))
+				goto out;
+			break;
+		case S_IFDIR:
+			/*
+			 * Optimize away all write operations, since the server
+			 * will check permissions when we perform the op.
+			 */
+			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
+				goto out;
+	}
+
+force_lookup:
+	lock_kernel();
+
+	if (!NFS_PROTO(inode)->access)
+		goto out_notsup;
+
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+	if (!IS_ERR(cred)) {
+		res = nfs_do_access(inode, cred, mask);
+		put_rpccred(cred);
+	} else
+		res = PTR_ERR(cred);
+	unlock_kernel();
+out:
+	return res;
+out_notsup:
+	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (res == 0)
+		res = generic_permission(inode, mask, NULL);
+	unlock_kernel();
+	return res;
+}
+
+/*
+ * Local variables:
+ *  version-control: t
+ *  kept-new-versions: 5
+ * End:
+ */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
new file mode 100644
index 00000000000..68df803f27c
--- /dev/null
+++ b/fs/nfs/direct.c
@@ -0,0 +1,808 @@
+/*
+ * linux/fs/nfs/direct.c
+ *
+ * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
+ *
+ * High-performance uncached I/O for the Linux NFS client
+ *
+ * There are important applications whose performance or correctness
+ * depends on uncached access to file data.  Database clusters
+ * (multiple copies of the same instance running on separate hosts) 
+ * implement their own cache coherency protocol that subsumes file
+ * system cache protocols.  Applications that process datasets 
+ * considerably larger than the client's memory do not always benefit 
+ * from a local cache.  A streaming video server, for instance, has no 
+ * need to cache the contents of a file.
+ *
+ * When an application requests uncached I/O, all read and write requests
+ * are made directly to the server; data stored or fetched via these
+ * requests is not cached in the Linux page cache.  The client does not
+ * correct unaligned requests from applications.  All requested bytes are
+ * held on permanent storage before a direct write system call returns to
+ * an application.
+ *
+ * Solaris implements an uncached I/O facility called directio() that
+ * is used for backups and sequential I/O to very large files.  Solaris
+ * also supports uncaching whole NFS partitions with "-o forcedirectio,"
+ * an undocumented mount option.
+ *
+ * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
+ * help from Andrew Morton.
+ *
+ * 18 Dec 2001	Initial implementation for 2.4  --cel
+ * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
+ * 08 Jun 2003	Port to 2.5 APIs  --cel
+ * 31 Mar 2004	Handle direct I/O without VFS support  --cel
+ * 15 Sep 2004	Parallel async reads  --cel
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/kref.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
+
+static kmem_cache_t *nfs_direct_cachep;
+
+/*
+ * This represents a set of asynchronous requests that we're waiting on
+ */
+struct nfs_direct_req {
+	struct kref		kref;		/* release manager */
+	struct list_head	list;		/* nfs_read_data structs */
+	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct page **		pages;		/* pages in our buffer */
+	unsigned int		npages;		/* count of pages */
+	atomic_t		complete,	/* i/os we're waiting for */
+				count,		/* bytes actually processed */
+				error;		/* any reported error */
+};
+
+
+/**
+ * nfs_get_user_pages - find and set up pages underlying user's buffer
+ * rw: direction (read or write)
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * @pages: returned array of page struct pointers underlying user's buffer
+ */
+static inline int
+nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
+		struct page ***pages)
+{
+	int result = -ENOMEM;
+	unsigned long page_count;
+	size_t array_size;
+
+	/* set an arbitrary limit to prevent type overflow */
+	/* XXX: this can probably be as large as INT_MAX */
+	if (size > MAX_DIRECTIO_SIZE) {
+		*pages = NULL;
+		return -EFBIG;
+	}
+
+	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	page_count -= user_addr >> PAGE_SHIFT;
+
+	array_size = (page_count * sizeof(struct page *));
+	*pages = kmalloc(array_size, GFP_KERNEL);
+	if (*pages) {
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
+					page_count, (rw == READ), 0,
+					*pages, NULL);
+		up_read(&current->mm->mmap_sem);
+	}
+	return result;
+}
+
+/**
+ * nfs_free_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer
+ * @npages: number of pages in the array
+ * @do_dirty: dirty the pages as we release them
+ */
+static void
+nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		if (do_dirty)
+			set_page_dirty_lock(pages[i]);
+		page_cache_release(pages[i]);
+	}
+	kfree(pages);
+}
+
+/**
+ * nfs_direct_req_release - release  nfs_direct_req structure for direct read
+ * @kref: kref object embedded in an nfs_direct_req structure
+ *
+ */
+static void nfs_direct_req_release(struct kref *kref)
+{
+	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+	kmem_cache_free(nfs_direct_cachep, dreq);
+}
+
+/**
+ * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
+ * @count: count of bytes for the read request
+ * @rsize: local rsize setting
+ *
+ * Note we also set the number of requests we have in the dreq when we are
+ * done.  This prevents races with I/O completion so we will always wait
+ * until all requests have been dispatched and completed.
+ */
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int reads = 0;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_read_data *data = nfs_readdata_alloc();
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_read_data, pages);
+				list_del(&data->pages);
+				nfs_readdata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		reads++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, reads);
+	return dreq;
+}
+
+/**
+ * nfs_direct_read_result - handle a read reply for a direct read request
+ * @data: address of NFS READ operation control block
+ * @status: status of this NFS READ operation
+ *
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ */
+static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+{
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+		wake_up(&dreq->wait);
+		kref_put(&dreq->kref, nfs_direct_req_release);
+	}
+}
+
+/**
+ * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
+ * @dreq: address of nfs_direct_req struct for this request
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ *
+ * For each nfs_read_data struct that was allocated on the list, dispatch
+ * an NFS READ operation
+ */
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+		struct inode *inode, struct nfs_open_context *ctx,
+		unsigned long user_addr, size_t count, loff_t file_offset)
+{
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	unsigned int curpage, pgbase;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
+
+	curpage = 0;
+	pgbase = user_addr & ~PAGE_MASK;
+	do {
+		struct nfs_read_data *data;
+		unsigned int bytes;
+
+		bytes = rsize;
+		if (count < rsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.eof = 0;
+		data->res.count = bytes;
+
+		NFS_PROTO(inode)->read_setup(data);
+
+		data->task.tk_cookie = (unsigned long) inode;
+		data->task.tk_calldata = data;
+		data->task.tk_release = nfs_readdata_release;
+		data->complete = nfs_direct_read_result;
+
+		lock_kernel();
+		rpc_execute(&data->task);
+		unlock_kernel();
+
+		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
+
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
+
+		count -= bytes;
+	} while (count != 0);
+}
+
+/**
+ * nfs_direct_read_wait - wait for I/O completion for direct reads
+ * @dreq: request on which we are to wait
+ * @intr: whether or not this wait can be interrupted
+ *
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;
+
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/**
+ * nfs_direct_read_seg - Read in one iov segment.  Generate separate
+ *                        read RPCs for each "rsize" bytes.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * @nr_pages: number of pages in the array
+ *
+ */
+static ssize_t nfs_direct_read_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		unsigned int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+
+	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+	if (!dreq)
+		return -ENOMEM;
+
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
+				 file_offset);
+	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
+
+	return result;
+}
+
+/**
+ * nfs_direct_read - For each iov segment, map the user's buffer
+ *                   then generate read RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * We've already pushed out any non-direct writes so that this read
+ * will see them when we read from the server.
+ */
+static ssize_t
+nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
+		const struct iovec *iov, loff_t file_offset,
+		unsigned long nr_segs)
+{
+	ssize_t tot_bytes = 0;
+	unsigned long seg = 0;
+
+	while ((seg < nr_segs) && (tot_bytes >= 0)) {
+		ssize_t result;
+		int page_count;
+		struct page **pages;
+		const struct iovec *vec = &iov[seg++];
+		unsigned long user_addr = (unsigned long) vec->iov_base;
+		size_t size = vec->iov_len;
+
+                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
+                if (page_count < 0) {
+                        nfs_free_user_pages(pages, 0, 0);
+			if (tot_bytes > 0)
+				break;
+                        return page_count;
+                }
+
+		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
+				file_offset, pages, page_count);
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			return result;
+		}
+		tot_bytes += result;
+		file_offset += result;
+		if (result < size)
+			break;
+	}
+
+	return tot_bytes;
+}
+
+/**
+ * nfs_direct_write_seg - Write out one iov segment.  Generate separate
+ *                        write RPCs for each "wsize" bytes, then commit.
+ * @inode: target inode
+ * @ctx: target file open context
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * nr_pages: size of pages array
+ */
+static ssize_t nfs_direct_write_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		int nr_pages)
+{
+	const unsigned int wsize = NFS_SERVER(inode)->wsize;
+	size_t request;
+	int curpage, need_commit;
+	ssize_t result, tot_bytes;
+	struct nfs_writeverf first_verf;
+	struct nfs_write_data *wdata;
+
+	wdata = nfs_writedata_alloc();
+	if (!wdata)
+		return -ENOMEM;
+
+	wdata->inode = inode;
+	wdata->cred = ctx->cred;
+	wdata->args.fh = NFS_FH(inode);
+	wdata->args.context = ctx;
+	wdata->args.stable = NFS_UNSTABLE;
+	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
+		wdata->args.stable = NFS_FILE_SYNC;
+	wdata->res.fattr = &wdata->fattr;
+	wdata->res.verf = &wdata->verf;
+
+	nfs_begin_data_update(inode);
+retry:
+	need_commit = 0;
+	tot_bytes = 0;
+	curpage = 0;
+	request = count;
+	wdata->args.pgbase = user_addr & ~PAGE_MASK;
+	wdata->args.offset = file_offset;
+	do {
+		wdata->args.count = request;
+		if (wdata->args.count > wsize)
+			wdata->args.count = wsize;
+		wdata->args.pages = &pages[curpage];
+
+		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
+			wdata->args.count, (long long) wdata->args.offset,
+			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->write(wdata);
+		unlock_kernel();
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			goto out;
+		}
+
+		if (tot_bytes == 0)
+			memcpy(&first_verf.verifier, &wdata->verf.verifier,
+						sizeof(first_verf.verifier));
+		if (wdata->verf.committed != NFS_FILE_SYNC) {
+			need_commit = 1;
+			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
+					sizeof(first_verf.verifier)));
+				goto sync_retry;
+		}
+
+		tot_bytes += result;
+
+		/* in case of a short write: stop now, let the app recover */
+		if (result < wdata->args.count)
+			break;
+
+		wdata->args.offset += result;
+		wdata->args.pgbase += result;
+		curpage += wdata->args.pgbase >> PAGE_SHIFT;
+		wdata->args.pgbase &= ~PAGE_MASK;
+		request -= result;
+	} while (request != 0);
+
+	/*
+	 * Commit data written so far, even in the event of an error
+	 */
+	if (need_commit) {
+		wdata->args.count = tot_bytes;
+		wdata->args.offset = file_offset;
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->commit(wdata);
+		unlock_kernel();
+
+		if (result < 0 || memcmp(&first_verf.verifier,
+					 &wdata->verf.verifier,
+					 sizeof(first_verf.verifier)) != 0)
+			goto sync_retry;
+	}
+	result = tot_bytes;
+
+out:
+	nfs_end_data_update_defer(inode);
+	nfs_writedata_free(wdata);
+	return result;
+
+sync_retry:
+	wdata->args.stable = NFS_FILE_SYNC;
+	goto retry;
+}
+
+/**
+ * nfs_direct_write - For each iov segment, map the user's buffer
+ *                    then generate write and commit RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * Upon return, generic_file_direct_IO invalidates any cached pages
+ * that non-direct readers might access, so they will pick up these
+ * writes immediately.
+ */
+static ssize_t nfs_direct_write(struct inode *inode,
+		struct nfs_open_context *ctx, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
+{
+	ssize_t tot_bytes = 0;
+	unsigned long seg = 0;
+
+	while ((seg < nr_segs) && (tot_bytes >= 0)) {
+		ssize_t result;
+		int page_count;
+		struct page **pages;
+		const struct iovec *vec = &iov[seg++];
+		unsigned long user_addr = (unsigned long) vec->iov_base;
+		size_t size = vec->iov_len;
+
+                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
+                if (page_count < 0) {
+                        nfs_free_user_pages(pages, 0, 0);
+			if (tot_bytes > 0)
+				break;
+                        return page_count;
+                }
+
+		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
+				file_offset, pages, page_count);
+		nfs_free_user_pages(pages, page_count, 0);
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			return result;
+		}
+		tot_bytes += result;
+		file_offset += result;
+		if (result < size)
+			break;
+	}
+	return tot_bytes;
+}
+
+/**
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ */
+ssize_t
+nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
+{
+	ssize_t result = -EINVAL;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx;
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+
+	/*
+	 * No support for async yet
+	 */
+	if (!is_sync_kiocb(iocb))
+		return result;
+
+	ctx = (struct nfs_open_context *)file->private_data;
+	switch (rw) {
+	case READ:
+		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
+				dentry->d_name.name, file_offset, nr_segs);
+
+		result = nfs_direct_read(inode, ctx, iov,
+						file_offset, nr_segs);
+		break;
+	case WRITE:
+		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
+				dentry->d_name.name, file_offset, nr_segs);
+
+		result = nfs_direct_write(inode, ctx, iov,
+						file_offset, nr_segs);
+		break;
+	default:
+		break;
+	}
+	return result;
+}
+
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer into which to read data
+ * count: number of bytes to read
+ * pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file.  For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change.  So our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ * 
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t
+nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+{
+	ssize_t retval = -EINVAL;
+	loff_t *ppos = &iocb->ki_pos;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx =
+			(struct nfs_open_context *) file->private_data;
+	struct dentry *dentry = file->f_dentry;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = count,
+	};
+
+	dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long) pos);
+
+	if (!is_sync_kiocb(iocb))
+		goto out;
+	if (count < 0)
+		goto out;
+	retval = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+		goto out;
+	retval = 0;
+	if (!count)
+		goto out;
+
+	if (mapping->nrpages) {
+		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
+			retval = filemap_fdatawait(mapping);
+		if (retval)
+			goto out;
+	}
+
+	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	if (retval > 0)
+		*ppos = pos + retval;
+
+out:
+	return retval;
+}
+
+/**
+ * nfs_file_direct_write - file direct write operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer from which to write data
+ * count: number of bytes to write
+ * pos: byte offset in file where writing starts
+ *
+ * We use this function for direct writes instead of calling
+ * generic_file_aio_write() in order to avoid taking the inode
+ * semaphore and updating the i_size.  The NFS server will set
+ * the new i_size and this client must read the updated size
+ * back into its cache.  We let the server do generic write
+ * parameter checking and report problems.
+ *
+ * We also avoid an unnecessary invocation of generic_osync_inode(),
+ * as it is fairly meaningless to sync the metadata of an NFS file.
+ *
+ * We eliminate local atime updates, see direct read above.
+ *
+ * We avoid unnecessary page cache invalidations for normal cached
+ * readers of this file.
+ *
+ * Note that O_APPEND is not supported for NFS direct writes, as there
+ * is no atomic O_APPEND write facility in the NFS protocol.
+ */
+ssize_t
+nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+	ssize_t retval = -EINVAL;
+	loff_t *ppos = &iocb->ki_pos;
+	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx =
+			(struct nfs_open_context *) file->private_data;
+	struct dentry *dentry = file->f_dentry;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct iovec iov = {
+		.iov_base = (char __user *)buf,
+		.iov_len = count,
+	};
+
+	dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+	if (!is_sync_kiocb(iocb))
+		goto out;
+	if (count < 0)
+		goto out;
+        if (pos < 0)
+		goto out;
+	retval = -EFAULT;
+	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+		goto out;
+        if (file->f_error) {
+                retval = file->f_error;
+                file->f_error = 0;
+                goto out;
+        }
+	retval = -EFBIG;
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			goto out;
+		}
+		if (count > limit - (unsigned long) pos)
+			count = limit - (unsigned long) pos;
+	}
+	retval = 0;
+	if (!count)
+		goto out;
+
+	if (mapping->nrpages) {
+		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
+			retval = filemap_fdatawait(mapping);
+		if (retval)
+			goto out;
+	}
+
+	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	if (mapping->nrpages)
+		invalidate_inode_pages2(mapping);
+	if (retval > 0)
+		*ppos = pos + retval;
+
+out:
+	return retval;
+}
+
+int nfs_init_directcache(void)
+{
+	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
+						sizeof(struct nfs_direct_req),
+						0, SLAB_RECLAIM_ACCOUNT,
+						NULL, NULL);
+	if (nfs_direct_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_directcache(void)
+{
+	if (kmem_cache_destroy(nfs_direct_cachep))
+		printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
new file mode 100644
index 00000000000..f06eee6dcff
--- /dev/null
+++ b/fs/nfs/file.c
@@ -0,0 +1,484 @@
+/*
+ *  linux/fs/nfs/file.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  Changes Copyright (C) 1994 by Florian La Roche
+ *   - Do not copy data too often around in the kernel.
+ *   - In nfs_file_read the return value of kmalloc wasn't checked.
+ *   - Put in a better version of read look-ahead buffering. Original idea
+ *     and implementation by Wai S Kok elekokws@ee.nus.sg.
+ *
+ *  Expire cache on write to a file by Wai S Kok (Oct 1994).
+ *
+ *  Total rewrite of read side for new NFS buffer cache.. Linus.
+ *
+ *  nfs regular file handling functions
+ */
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY		NFSDBG_FILE
+
+static int nfs_file_open(struct inode *, struct file *);
+static int nfs_file_release(struct inode *, struct file *);
+static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
+static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
+static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static int  nfs_file_flush(struct file *);
+static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
+static int nfs_check_flags(int flags);
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
+
+struct file_operations nfs_file_operations = {
+	.llseek		= remote_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read		= nfs_file_read,
+	.aio_write		= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.sendfile	= nfs_file_sendfile,
+	.check_flags	= nfs_check_flags,
+};
+
+struct inode_operations nfs_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
+
+/* Hack for future NFS swap support */
+#ifndef IS_SWAPFILE
+# define IS_SWAPFILE(inode)	(0)
+#endif
+
+static int nfs_check_flags(int flags)
+{
+	if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Open file
+ */
+static int
+nfs_file_open(struct inode *inode, struct file *filp)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	int (*open)(struct inode *, struct file *);
+	int res;
+
+	res = nfs_check_flags(filp->f_flags);
+	if (res)
+		return res;
+
+	lock_kernel();
+	/* Do NFSv4 open() call */
+	if ((open = server->rpc_ops->file_open) != NULL)
+		res = open(inode, filp);
+	unlock_kernel();
+	return res;
+}
+
+static int
+nfs_file_release(struct inode *inode, struct file *filp)
+{
+	/* Ensure that dirty pages are flushed out with the right creds */
+	if (filp->f_mode & FMODE_WRITE)
+		filemap_fdatawrite(filp->f_mapping);
+	return NFS_PROTO(inode)->file_release(inode, filp);
+}
+
+/*
+ * Flush all dirty pages, and check for write errors.
+ *
+ */
+static int
+nfs_file_flush(struct file *file)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct inode	*inode = file->f_dentry->d_inode;
+	int		status;
+
+	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+	if ((file->f_mode & FMODE_WRITE) == 0)
+		return 0;
+	lock_kernel();
+	/* Ensure that data+attribute caches are up to date after close() */
+	status = nfs_wb_all(inode);
+	if (!status) {
+		status = ctx->error;
+		ctx->error = 0;
+		if (!status && !nfs_have_delegation(inode, FMODE_READ))
+			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	}
+	unlock_kernel();
+	return status;
+}
+
+static ssize_t
+nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+{
+	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct inode * inode = dentry->d_inode;
+	ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+	if (iocb->ki_filp->f_flags & O_DIRECT)
+		return nfs_file_direct_read(iocb, buf, count, pos);
+#endif
+
+	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long) pos);
+
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!result)
+		result = generic_file_aio_read(iocb, buf, count, pos);
+	return result;
+}
+
+static ssize_t
+nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
+		read_actor_t actor, void *target)
+{
+	struct dentry *dentry = filp->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	ssize_t res;
+
+	dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long long) *ppos);
+
+	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!res)
+		res = generic_file_sendfile(filp, ppos, count, actor, target);
+	return res;
+}
+
+static int
+nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	int	status;
+
+	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!status)
+		status = generic_file_mmap(file, vma);
+	return status;
+}
+
+/*
+ * Flush any dirty pages for this process, and check for write errors.
+ * The return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+static int
+nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct inode *inode = dentry->d_inode;
+	int status;
+
+	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+	lock_kernel();
+	status = nfs_wb_all(inode);
+	if (!status) {
+		status = ctx->error;
+		ctx->error = 0;
+	}
+	unlock_kernel();
+	return status;
+}
+
+/*
+ * This does the "real" work of the write. The generic routine has
+ * allocated the page, locked it, done all the page alignment stuff
+ * calculations etc. Now we should just copy the data from user
+ * space and write it back to the real medium..
+ *
+ * If the writer ends up delaying the write, the writer needs to
+ * increment the page use counts until he is done with the page.
+ */
+static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	return nfs_flush_incompatible(file, page);
+}
+
+static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	long status;
+
+	lock_kernel();
+	status = nfs_updatepage(file, page, offset, to-offset);
+	unlock_kernel();
+	return status;
+}
+
+struct address_space_operations nfs_file_aops = {
+	.readpage = nfs_readpage,
+	.readpages = nfs_readpages,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+	.writepage = nfs_writepage,
+	.writepages = nfs_writepages,
+	.prepare_write = nfs_prepare_write,
+	.commit_write = nfs_commit_write,
+#ifdef CONFIG_NFS_DIRECTIO
+	.direct_IO = nfs_direct_IO,
+#endif
+};
+
+/* 
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct inode * inode = dentry->d_inode;
+	ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+	if (iocb->ki_filp->f_flags & O_DIRECT)
+		return nfs_file_direct_write(iocb, buf, count, pos);
+#endif
+
+	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+	result = -EBUSY;
+	if (IS_SWAPFILE(inode))
+		goto out_swapfile;
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (result)
+		goto out;
+
+	result = count;
+	if (!count)
+		goto out;
+
+	result = generic_file_aio_write(iocb, buf, count, pos);
+out:
+	return result;
+
+out_swapfile:
+	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
+	goto out;
+}
+
+static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	int status = 0;
+
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else {
+		struct file_lock *cfl = posix_test_lock(filp, fl);
+
+		fl->fl_type = F_UNLCK;
+		if (cfl != NULL)
+			memcpy(fl, cfl, sizeof(*fl));
+	}
+	unlock_kernel();
+	return status;
+}
+
+static int do_vfs_lock(struct file *file, struct file_lock *fl)
+{
+	int res = 0;
+	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+		case FL_POSIX:
+			res = posix_lock_file_wait(file, fl);
+			break;
+		case FL_FLOCK:
+			res = flock_lock_file_wait(file, fl);
+			break;
+		default:
+			BUG();
+	}
+	if (res < 0)
+		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
+				__FUNCTION__);
+	return res;
+}
+
+static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	sigset_t oldset;
+	int status;
+
+	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+	/*
+	 * Flush all pending writes before doing anything
+	 * with locks..
+	 */
+	filemap_fdatawrite(filp->f_mapping);
+	down(&inode->i_sem);
+	nfs_wb_all(inode);
+	up(&inode->i_sem);
+	filemap_fdatawait(filp->f_mapping);
+
+	/* NOTE: special case
+	 * 	If we're signalled while cleaning up locks on process exit, we
+	 * 	still need to complete the unlock.
+	 */
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else
+		status = do_vfs_lock(filp, fl);
+	unlock_kernel();
+	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+	return status;
+}
+
+static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	sigset_t oldset;
+	int status;
+
+	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+	/*
+	 * Flush all pending writes before doing anything
+	 * with locks..
+	 */
+	status = filemap_fdatawrite(filp->f_mapping);
+	if (status == 0) {
+		down(&inode->i_sem);
+		status = nfs_wb_all(inode);
+		up(&inode->i_sem);
+		if (status == 0)
+			status = filemap_fdatawait(filp->f_mapping);
+	}
+	if (status < 0)
+		goto out;
+
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+		/* If we were signalled we still need to ensure that
+		 * we clean up any state on the server. We therefore
+		 * record the lock call as having succeeded in order to
+		 * ensure that locks_remove_posix() cleans it out when
+		 * the process exits.
+		 */
+		if (status == -EINTR || status == -ERESTARTSYS)
+			do_vfs_lock(filp, fl);
+	} else
+		status = do_vfs_lock(filp, fl);
+	unlock_kernel();
+	if (status < 0)
+		goto out;
+	/*
+	 * Make sure we clear the cache whenever we try to get the lock.
+	 * This makes locking act as a cache coherency point.
+	 */
+	filemap_fdatawrite(filp->f_mapping);
+	down(&inode->i_sem);
+	nfs_wb_all(inode);	/* we may have slept */
+	up(&inode->i_sem);
+	filemap_fdatawait(filp->f_mapping);
+	nfs_zap_caches(inode);
+out:
+	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+	return status;
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode * inode = filp->f_mapping->host;
+
+	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
+			inode->i_sb->s_id, inode->i_ino,
+			fl->fl_type, fl->fl_flags,
+			(long long)fl->fl_start, (long long)fl->fl_end);
+
+	if (!inode)
+		return -EINVAL;
+
+	/* No mandatory locks over NFS */
+	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if (IS_GETLK(cmd))
+		return do_getlk(filp, cmd, fl);
+	if (fl->fl_type == F_UNLCK)
+		return do_unlk(filp, cmd, fl);
+	return do_setlk(filp, cmd, fl);
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode * inode = filp->f_mapping->host;
+
+	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
+			inode->i_sb->s_id, inode->i_ino,
+			fl->fl_type, fl->fl_flags);
+
+	if (!inode)
+		return -EINVAL;
+
+	/*
+	 * No BSD flocks over NFS allowed.
+	 * Note: we could try to fake a POSIX lock request here by
+	 * using ((u32) filp | 0x80000000) or some such as the pid.
+	 * Not sure whether that would be unique, though, or whether
+	 * that would break in other places.
+	 */
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+
+	/* We're simulating flock() locks using posix locks on the server */
+	fl->fl_owner = (fl_owner_t)filp;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+
+	if (fl->fl_type == F_UNLCK)
+		return do_unlk(filp, cmd, fl);
+	return do_setlk(filp, cmd, fl);
+}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
new file mode 100644
index 00000000000..b74c4e3a64e
--- /dev/null
+++ b/fs/nfs/idmap.c
@@ -0,0 +1,498 @@
+/*
+ * fs/nfs/idmap.c
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sched.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_fs.h>
+
+#include <linux/nfs_idmap.h>
+
+#define IDMAP_HASH_SZ          128
+
+struct idmap_hashent {
+	__u32 ih_id;
+	int ih_namelen;
+	char ih_name[IDMAP_NAMESZ];
+};
+
+struct idmap_hashtable {
+	__u8 h_type;
+	struct idmap_hashent h_entries[IDMAP_HASH_SZ];
+};
+
+struct idmap {
+	char                  idmap_path[48];
+	struct dentry        *idmap_dentry;
+	wait_queue_head_t     idmap_wq;
+	struct idmap_msg      idmap_im;
+	struct semaphore      idmap_lock;    /* Serializes upcalls */
+	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct idmap_hashtable idmap_user_hash;
+	struct idmap_hashtable idmap_group_hash;
+};
+
+static ssize_t   idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
+		     char __user *, size_t);
+static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
+		     size_t);
+void             idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static unsigned int fnvhash32(const void *, size_t);
+
+static struct rpc_pipe_ops idmap_upcall_ops = {
+        .upcall         = idmap_pipe_upcall,
+        .downcall       = idmap_pipe_downcall,
+        .destroy_msg    = idmap_pipe_destroy_msg,
+};
+
+void
+nfs_idmap_new(struct nfs4_client *clp)
+{
+	struct idmap *idmap;
+
+	if (clp->cl_idmap != NULL)
+		return;
+        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+                return;
+
+	memset(idmap, 0, sizeof(*idmap));
+
+	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
+	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
+
+        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+	    idmap, &idmap_upcall_ops, 0);
+        if (IS_ERR(idmap->idmap_dentry)) {
+		kfree(idmap);
+		return;
+	}
+
+        init_MUTEX(&idmap->idmap_lock);
+        init_MUTEX(&idmap->idmap_im_lock);
+	init_waitqueue_head(&idmap->idmap_wq);
+	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
+	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
+
+	clp->cl_idmap = idmap;
+}
+
+void
+nfs_idmap_delete(struct nfs4_client *clp)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	if (!idmap)
+		return;
+	rpc_unlink(idmap->idmap_path);
+	clp->cl_idmap = NULL;
+	kfree(idmap);
+}
+
+/*
+ * Helper routines for manipulating the hashtable
+ */
+static inline struct idmap_hashent *
+idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
+{
+	return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+{
+	struct idmap_hashent *he = idmap_name_hash(h, name, len);
+
+	if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
+		return NULL;
+	return he;
+}
+
+static inline struct idmap_hashent *
+idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+{
+	return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
+{
+	struct idmap_hashent *he = idmap_id_hash(h, id);
+	if (he->ih_id != id || he->ih_namelen == 0)
+		return NULL;
+	return he;
+}
+
+/*
+ * Routines for allocating new entries in the hashtable.
+ * For now, we just have 1 entry per bucket, so it's all
+ * pretty trivial.
+ */
+static inline struct idmap_hashent *
+idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
+{
+	return idmap_name_hash(h, name, len);
+}
+
+static inline struct idmap_hashent *
+idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+{
+	return idmap_id_hash(h, id);
+}
+
+static void
+idmap_update_entry(struct idmap_hashent *he, const char *name,
+		size_t namelen, __u32 id)
+{
+	he->ih_id = id;
+	memcpy(he->ih_name, name, namelen);
+	he->ih_name[namelen] = '\0';
+	he->ih_namelen = namelen;
+}
+
+/*
+ * Name -> ID
+ */
+static int
+nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
+		const char *name, size_t namelen, __u32 *id)
+{
+	struct rpc_pipe_msg msg;
+	struct idmap_msg *im;
+	struct idmap_hashent *he;
+	DECLARE_WAITQUEUE(wq, current);
+	int ret = -EIO;
+
+	im = &idmap->idmap_im;
+
+	/*
+	 * String sanity checks
+	 * Note that the userland daemon expects NUL terminated strings
+	 */
+	for (;;) {
+		if (namelen == 0)
+			return -EINVAL;
+		if (name[namelen-1] != '\0')
+			break;
+		namelen--;
+	}
+	if (namelen >= IDMAP_NAMESZ)
+		return -EINVAL;
+
+	down(&idmap->idmap_lock);
+	down(&idmap->idmap_im_lock);
+
+	he = idmap_lookup_name(h, name, namelen);
+	if (he != NULL) {
+		*id = he->ih_id;
+		ret = 0;
+		goto out;
+	}
+
+	memset(im, 0, sizeof(*im));
+	memcpy(im->im_name, name, namelen);
+
+	im->im_type = h->h_type;
+	im->im_conv = IDMAP_CONV_NAMETOID;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.data = im;
+	msg.len = sizeof(*im);
+
+	add_wait_queue(&idmap->idmap_wq, &wq);
+	if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+		remove_wait_queue(&idmap->idmap_wq, &wq);
+		goto out;
+	}
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	up(&idmap->idmap_im_lock);
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&idmap->idmap_wq, &wq);
+	down(&idmap->idmap_im_lock);
+
+	if (im->im_status & IDMAP_STATUS_SUCCESS) {
+		*id = im->im_id;
+		ret = 0;
+	}
+
+ out:
+	memset(im, 0, sizeof(*im));
+	up(&idmap->idmap_im_lock);
+	up(&idmap->idmap_lock);
+	return (ret);
+}
+
+/*
+ * ID -> Name
+ */
+static int
+nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
+		__u32 id, char *name)
+{
+	struct rpc_pipe_msg msg;
+	struct idmap_msg *im;
+	struct idmap_hashent *he;
+	DECLARE_WAITQUEUE(wq, current);
+	int ret = -EIO;
+	unsigned int len;
+
+	im = &idmap->idmap_im;
+
+	down(&idmap->idmap_lock);
+	down(&idmap->idmap_im_lock);
+
+	he = idmap_lookup_id(h, id);
+	if (he != 0) {
+		memcpy(name, he->ih_name, he->ih_namelen);
+		ret = he->ih_namelen;
+		goto out;
+	}
+
+	memset(im, 0, sizeof(*im));
+	im->im_type = h->h_type;
+	im->im_conv = IDMAP_CONV_IDTONAME;
+	im->im_id = id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.data = im;
+	msg.len = sizeof(*im);
+
+	add_wait_queue(&idmap->idmap_wq, &wq);
+
+	if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+		remove_wait_queue(&idmap->idmap_wq, &wq);
+		goto out;
+	}
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	up(&idmap->idmap_im_lock);
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&idmap->idmap_wq, &wq);
+	down(&idmap->idmap_im_lock);
+
+	if (im->im_status & IDMAP_STATUS_SUCCESS) {
+		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
+			goto out;
+		memcpy(name, im->im_name, len);
+		ret = len;
+	}
+
+ out:
+	memset(im, 0, sizeof(*im));
+	up(&idmap->idmap_im_lock);
+	up(&idmap->idmap_lock);
+	return ret;
+}
+
+/* RPC pipefs upcall/downcall routines */
+static ssize_t
+idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+    char __user *dst, size_t buflen)
+{
+        char *data = (char *)msg->data + msg->copied;
+        ssize_t mlen = msg->len - msg->copied;
+        ssize_t left;
+
+        if (mlen > buflen)
+                mlen = buflen;
+
+        left = copy_to_user(dst, data, mlen);
+	if (left < 0) {
+		msg->errno = left;
+		return left;
+	}
+	mlen -= left;
+	msg->copied += mlen;
+	msg->errno = 0;
+        return mlen;
+}
+
+static ssize_t
+idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+        struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct idmap *idmap = (struct idmap *)rpci->private;
+	struct idmap_msg im_in, *im = &idmap->idmap_im;
+	struct idmap_hashtable *h;
+	struct idmap_hashent *he = NULL;
+	int namelen_in;
+	int ret;
+
+        if (mlen != sizeof(im_in))
+                return (-ENOSPC);
+
+        if (copy_from_user(&im_in, src, mlen) != 0)
+		return (-EFAULT);
+
+	down(&idmap->idmap_im_lock);
+
+	ret = mlen;
+	im->im_status = im_in.im_status;
+	/* If we got an error, terminate now, and wake up pending upcalls */
+	if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
+		wake_up(&idmap->idmap_wq);
+		goto out;
+	}
+
+	/* Sanity checking of strings */
+	ret = -EINVAL;
+	namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
+	if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
+		goto out;
+
+	switch (im_in.im_type) {
+		case IDMAP_TYPE_USER:
+			h = &idmap->idmap_user_hash;
+			break;
+		case IDMAP_TYPE_GROUP:
+			h = &idmap->idmap_group_hash;
+			break;
+		default:
+			goto out;
+	}
+
+	switch (im_in.im_conv) {
+	case IDMAP_CONV_IDTONAME:
+		/* Did we match the current upcall? */
+		if (im->im_conv == IDMAP_CONV_IDTONAME
+				&& im->im_type == im_in.im_type
+				&& im->im_id == im_in.im_id) {
+			/* Yes: copy string, including the terminating '\0'  */
+			memcpy(im->im_name, im_in.im_name, namelen_in);
+			im->im_name[namelen_in] = '\0';
+			wake_up(&idmap->idmap_wq);
+		}
+		he = idmap_alloc_id(h, im_in.im_id);
+		break;
+	case IDMAP_CONV_NAMETOID:
+		/* Did we match the current upcall? */
+		if (im->im_conv == IDMAP_CONV_NAMETOID
+				&& im->im_type == im_in.im_type
+				&& strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
+				&& memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
+			im->im_id = im_in.im_id;
+			wake_up(&idmap->idmap_wq);
+		}
+		he = idmap_alloc_name(h, im_in.im_name, namelen_in);
+		break;
+	default:
+		goto out;
+	}
+
+	/* If the entry is valid, also copy it to the cache */
+	if (he != NULL)
+		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
+	ret = mlen;
+out:
+	up(&idmap->idmap_im_lock);
+	return ret;
+}
+
+void
+idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+	struct idmap_msg *im = msg->data;
+	struct idmap *idmap = container_of(im, struct idmap, idmap_im); 
+
+	if (msg->errno >= 0)
+		return;
+	down(&idmap->idmap_im_lock);
+	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
+	wake_up(&idmap->idmap_wq);
+	up(&idmap->idmap_im_lock);
+}
+
+/* 
+ * Fowler/Noll/Vo hash
+ *    http://www.isthe.com/chongo/tech/comp/fnv/
+ */
+
+#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
+#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
+
+static unsigned int fnvhash32(const void *buf, size_t buflen)
+{
+	const unsigned char *p, *end = (const unsigned char *)buf + buflen;
+	unsigned int hash = FNV_1_32;
+
+	for (p = buf; p < end; p++) {
+		hash *= FNV_P_32;
+		hash ^= (unsigned int)*p;
+	}
+
+	return (hash);
+}
+
+int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
+}
+
+int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
+}
+
+int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+}
+int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+}
+
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
new file mode 100644
index 00000000000..6345f26e87e
--- /dev/null
+++ b/fs/nfs/inode.c
@@ -0,0 +1,2003 @@
+/*
+ *  linux/fs/nfs/inode.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  nfs inode and superblock handling functions
+ *
+ *  Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some
+ *  experimental NFS changes. Modularisation taken straight from SYS5 fs.
+ *
+ *  Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
+ *  J.S.Peatfield@damtp.cam.ac.uk
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+#define NFS_PARANOIA 1
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
+static void nfs_invalidate_inode(struct inode *);
+static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long);
+
+static struct inode *nfs_alloc_inode(struct super_block *sb);
+static void nfs_destroy_inode(struct inode *);
+static int nfs_write_inode(struct inode *,int);
+static void nfs_delete_inode(struct inode *);
+static void nfs_clear_inode(struct inode *);
+static void nfs_umount_begin(struct super_block *);
+static int  nfs_statfs(struct super_block *, struct kstatfs *);
+static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+
+static struct rpc_program	nfs_program;
+
+static struct super_operations nfs_sops = { 
+	.alloc_inode	= nfs_alloc_inode,
+	.destroy_inode	= nfs_destroy_inode,
+	.write_inode	= nfs_write_inode,
+	.delete_inode	= nfs_delete_inode,
+	.statfs		= nfs_statfs,
+	.clear_inode	= nfs_clear_inode,
+	.umount_begin	= nfs_umount_begin,
+	.show_options	= nfs_show_options,
+};
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_stat		nfs_rpcstat = {
+	.program		= &nfs_program
+};
+static struct rpc_version *	nfs_version[] = {
+	NULL,
+	NULL,
+	&nfs_version2,
+#if defined(CONFIG_NFS_V3)
+	&nfs_version3,
+#elif defined(CONFIG_NFS_V4)
+	NULL,
+#endif
+#if defined(CONFIG_NFS_V4)
+	&nfs_version4,
+#endif
+};
+
+static struct rpc_program	nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= sizeof(nfs_version) / sizeof(nfs_version[0]),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+static inline unsigned long
+nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
+{
+	return nfs_fileid_to_ino_t(fattr->fileid);
+}
+
+static int
+nfs_write_inode(struct inode *inode, int sync)
+{
+	int flags = sync ? FLUSH_WAIT : 0;
+	int ret;
+
+	ret = nfs_commit_inode(inode, 0, 0, flags);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static void
+nfs_delete_inode(struct inode * inode)
+{
+	dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+	nfs_wb_all(inode);
+	/*
+	 * The following should never happen...
+	 */
+	if (nfs_have_writebacks(inode)) {
+		printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
+	}
+
+	clear_inode(inode);
+}
+
+/*
+ * For the moment, the only task for the NFS clear_inode method is to
+ * release the mmap credential
+ */
+static void
+nfs_clear_inode(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rpc_cred *cred;
+
+	nfs_wb_all(inode);
+	BUG_ON (!list_empty(&nfsi->open_files));
+	cred = nfsi->cache_access.cred;
+	if (cred)
+		put_rpccred(cred);
+	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+}
+
+void
+nfs_umount_begin(struct super_block *sb)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct rpc_clnt	*rpc;
+
+	/* -EIO all pending I/O */
+	if ((rpc = server->client) != NULL)
+		rpc_killall_tasks(rpc);
+}
+
+
+static inline unsigned long
+nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
+{
+	/* make sure blocksize is a power of two */
+	if ((bsize & (bsize - 1)) || nrbitsp) {
+		unsigned char	nrbits;
+
+		for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
+			;
+		bsize = 1 << nrbits;
+		if (nrbitsp)
+			*nrbitsp = nrbits;
+	}
+
+	return bsize;
+}
+
+/*
+ * Calculate the number of 512byte blocks used.
+ */
+static inline unsigned long
+nfs_calc_block_size(u64 tsize)
+{
+	loff_t used = (tsize + 511) >> 9;
+	return (used > ULONG_MAX) ? ULONG_MAX : used;
+}
+
+/*
+ * Compute and set NFS server blocksize
+ */
+static inline unsigned long
+nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
+{
+	if (bsize < 1024)
+		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
+		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+
+	return nfs_block_bits(bsize, nrbitsp);
+}
+
+/*
+ * Obtain the root inode of the file system.
+ */
+static struct inode *
+nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+{
+	struct nfs_server	*server = NFS_SB(sb);
+	struct inode *rooti;
+	int			error;
+
+	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
+	if (!rooti)
+		return ERR_PTR(-ENOMEM);
+	return rooti;
+}
+
+/*
+ * Do NFS version-independent mount processing, and sanity checking
+ */
+static int
+nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
+{
+	struct nfs_server	*server;
+	struct inode		*root_inode;
+	struct nfs_fattr	fattr;
+	struct nfs_fsinfo	fsinfo = {
+					.fattr = &fattr,
+				};
+	struct nfs_pathconf pathinfo = {
+			.fattr = &fattr,
+	};
+	int no_root_error = 0;
+	unsigned long max_rpc_payload;
+
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+	server = NFS_SB(sb);
+
+	sb->s_magic      = NFS_SUPER_MAGIC;
+
+	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
+	/* Did getting the root inode fail? */
+	if (IS_ERR(root_inode)) {
+		no_root_error = PTR_ERR(root_inode);
+		goto out_no_root;
+	}
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root) {
+		no_root_error = -ENOMEM;
+		goto out_no_root;
+	}
+	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+
+	/* Get some general file system info */
+	if (server->namelen == 0 &&
+	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+		server->namelen = pathinfo.max_namelen;
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
+
+	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
+		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
+	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
+		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (server->rpages > NFS_READ_MAXIOV) {
+		server->rpages = NFS_READ_MAXIOV;
+		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
+	}
+
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+        if (server->wpages > NFS_WRITE_MAXIOV) {
+		server->wpages = NFS_WRITE_MAXIOV;
+                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
+	}
+
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+							 &sb->s_blocksize_bits);
+	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+		sb->s_flags |= MS_SYNCHRONOUS;
+	}
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	sb->s_maxbytes = fsinfo.maxfilesize;
+	if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
+		sb->s_maxbytes = MAX_LFS_FILESIZE; 
+
+	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
+	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+	return 0;
+	/* Yargs. It didn't work out. */
+out_no_root:
+	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
+	if (!IS_ERR(root_inode))
+		iput(root_inode);
+	return no_root_error;
+}
+
+/*
+ * Create an RPC client handle.
+ */
+static struct rpc_clnt *
+nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt		*xprt = NULL;
+	struct rpc_clnt		*clnt = NULL;
+	int			tcp   = (data->flags & NFS_MOUNT_TCP);
+
+	/* Initialize timeout values */
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_maxval  = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
+	timeparms.to_exponential = 1;
+
+	if (!timeparms.to_initval)
+		timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	/* create transport and client */
+	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
+				 &server->addr, &timeparms);
+	if (IS_ERR(xprt)) {
+		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		return (struct rpc_clnt *)xprt;
+	}
+	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				 server->rpc_ops->version, data->pseudoflavor);
+	if (IS_ERR(clnt)) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		goto out_fail;
+	}
+
+	clnt->cl_intr     = 1;
+	clnt->cl_softrtry = 1;
+	clnt->cl_chatty   = 1;
+
+	return clnt;
+
+out_fail:
+	xprt_destroy(xprt);
+	return clnt;
+}
+
+/*
+ * The way this works is that the mount process passes a structure
+ * in the data argument which contains the server's IP address
+ * and the root file handle obtained from the server's mount
+ * daemon. We stash these away in the private superblock fields.
+ */
+static int
+nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+{
+	struct nfs_server	*server;
+	rpc_authflavor_t	authflavor;
+
+	server           = NFS_SB(sb);
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	/* Start lockd here, before we might error out */
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_up();
+
+	server->namelen  = data->namlen;
+	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
+	if (!server->hostname)
+		return -ENOMEM;
+	strcpy(server->hostname, data->hostname);
+
+	/* Check NFS protocol revision and initialize RPC op vector
+	 * and file handle pool. */
+	if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3
+		server->rpc_ops = &nfs_v3_clientops;
+		server->caps |= NFS_CAP_READDIRPLUS;
+		if (data->version < 4) {
+			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
+			return -EIO;
+		}
+#else
+		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
+		return -EIO;
+#endif
+	} else {
+		server->rpc_ops = &nfs_v2_clientops;
+	}
+
+	/* Fill in pseudoflavor for mount version < 5 */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+	authflavor = data->pseudoflavor;	/* save for sb_init() */
+	/* XXX maybe we want to add a server->pseudoflavor field */
+
+	/* Create RPC client handles */
+	server->client = nfs_create_client(server, data);
+	if (IS_ERR(server->client))
+		return PTR_ERR(server->client);
+	/* RFC 2623, sec 2.3.2 */
+	if (authflavor != RPC_AUTH_UNIX) {
+		server->client_sys = rpc_clone_client(server->client);
+		if (IS_ERR(server->client_sys))
+			return PTR_ERR(server->client_sys);
+		if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
+			return -ENOMEM;
+	} else {
+		atomic_inc(&server->client->cl_count);
+		server->client_sys = server->client;
+	}
+
+	if (server->flags & NFS_MOUNT_VER3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		sb->s_time_gran = 1;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	sb->s_op = &nfs_sops;
+	return nfs_sb_init(sb, authflavor);
+}
+
+static int
+nfs_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	unsigned char blockbits;
+	unsigned long blockres;
+	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fattr fattr;
+	struct nfs_fsstat res = {
+			.fattr = &fattr,
+	};
+	int error;
+
+	lock_kernel();
+
+	error = server->rpc_ops->statfs(server, rootfh, &res);
+	buf->f_type = NFS_SUPER_MAGIC;
+	if (error < 0)
+		goto out_err;
+
+	/*
+	 * Current versions of glibc do not correctly handle the
+	 * case where f_frsize != f_bsize.  Eventually we want to
+	 * report the value of wtmult in this field.
+	 */
+	buf->f_frsize = sb->s_blocksize;
+
+	/*
+	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
+	 * are reported in units of f_frsize.  Linux hasn't had
+	 * an f_frsize field in its statfs struct until recently,
+	 * thus historically Linux's sys_statfs reports these
+	 * fields in units of f_bsize.
+	 */
+	buf->f_bsize = sb->s_blocksize;
+	blockbits = sb->s_blocksize_bits;
+	blockres = (1 << blockbits) - 1;
+	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
+	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
+	buf->f_bavail = (res.abytes + blockres) >> blockbits;
+
+	buf->f_files = res.tfiles;
+	buf->f_ffree = res.afiles;
+
+	buf->f_namelen = server->namelen;
+ out:
+	unlock_kernel();
+
+	return 0;
+
+ out_err:
+	printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error);
+	buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
+	goto out;
+
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	static struct proc_nfs_info {
+		int flag;
+		char *str;
+		char *nostr;
+	} nfs_info[] = {
+		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
+		{ NFS_MOUNT_INTR, ",intr", "" },
+		{ NFS_MOUNT_POSIX, ",posix", "" },
+		{ NFS_MOUNT_TCP, ",tcp", ",udp" },
+		{ NFS_MOUNT_NOCTO, ",nocto", "" },
+		{ NFS_MOUNT_NOAC, ",noac", "" },
+		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ 0, NULL, NULL }
+	};
+	struct proc_nfs_info *nfs_infop;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+	seq_printf(m, ",v%d", nfss->rpc_ops->version);
+	seq_printf(m, ",rsize=%d", nfss->rsize);
+	seq_printf(m, ",wsize=%d", nfss->wsize);
+	if (nfss->acregmin != 3*HZ)
+		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+	if (nfss->acregmax != 60*HZ)
+		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+	if (nfss->acdirmin != 30*HZ)
+		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+	if (nfss->acdirmax != 60*HZ)
+		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
+		if (nfss->flags & nfs_infop->flag)
+			seq_puts(m, nfs_infop->str);
+		else
+			seq_puts(m, nfs_infop->nostr);
+	}
+	seq_puts(m, ",addr=");
+	seq_escape(m, nfss->hostname, " \t\n\\");
+	return 0;
+}
+
+/*
+ * Invalidate the local caches
+ */
+void
+nfs_zap_caches(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int mode = inode->i_mode;
+
+	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+
+	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+	else
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+}
+
+/*
+ * Invalidate, but do not unhash, the inode
+ */
+static void
+nfs_invalidate_inode(struct inode *inode)
+{
+	umode_t save_mode = inode->i_mode;
+
+	make_bad_inode(inode);
+	inode->i_mode = save_mode;
+	nfs_zap_caches(inode);
+}
+
+struct nfs_find_desc {
+	struct nfs_fh		*fh;
+	struct nfs_fattr	*fattr;
+};
+
+/*
+ * In NFSv3 we can have 64bit inode numbers. In order to support
+ * this, and re-exported directories (also seen in NFSv2)
+ * we are forced to allow 2 different inodes to have the same
+ * i_ino.
+ */
+static int
+nfs_find_actor(struct inode *inode, void *opaque)
+{
+	struct nfs_find_desc	*desc = (struct nfs_find_desc *)opaque;
+	struct nfs_fh		*fh = desc->fh;
+	struct nfs_fattr	*fattr = desc->fattr;
+
+	if (NFS_FILEID(inode) != fattr->fileid)
+		return 0;
+	if (nfs_compare_fh(NFS_FH(inode), fh))
+		return 0;
+	if (is_bad_inode(inode) || NFS_STALE(inode))
+		return 0;
+	return 1;
+}
+
+static int
+nfs_init_locked(struct inode *inode, void *opaque)
+{
+	struct nfs_find_desc	*desc = (struct nfs_find_desc *)opaque;
+	struct nfs_fattr	*fattr = desc->fattr;
+
+	NFS_FILEID(inode) = fattr->fileid;
+	nfs_copy_fh(NFS_FH(inode), desc->fh);
+	return 0;
+}
+
+/* Don't use READDIRPLUS on directories that we believe are too large */
+#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
+
+/*
+ * This is our front-end to iget that looks up inodes by file handle
+ * instead of inode number.
+ */
+struct inode *
+nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct nfs_find_desc desc = {
+		.fh	= fh,
+		.fattr	= fattr
+	};
+	struct inode *inode = NULL;
+	unsigned long hash;
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		goto out_no_inode;
+
+	if (!fattr->nlink) {
+		printk("NFS: Buggy server - nlink == 0!\n");
+		goto out_no_inode;
+	}
+
+	hash = nfs_fattr_to_ino_t(fattr);
+
+	if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+		goto out_no_inode;
+
+	if (inode->i_state & I_NEW) {
+		struct nfs_inode *nfsi = NFS_I(inode);
+
+		/* We set i_ino for the few things that still rely on it,
+		 * such as stat(2) */
+		inode->i_ino = hash;
+
+		/* We can't support update_atime(), since the server will reset it */
+		inode->i_flags |= S_NOATIME|S_NOCMTIME;
+		inode->i_mode = fattr->mode;
+		/* Why so? Because we want revalidate for devices/FIFOs, and
+		 * that's precisely what we have in nfs_file_inode_operations.
+		 */
+		inode->i_op = &nfs_file_inode_operations;
+		if (S_ISREG(inode->i_mode)) {
+			inode->i_fop = &nfs_file_operations;
+			inode->i_data.a_ops = &nfs_file_aops;
+			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_fop = &nfs_dir_operations;
+			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
+			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
+				NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
+		} else if (S_ISLNK(inode->i_mode))
+			inode->i_op = &nfs_symlink_inode_operations;
+		else
+			init_special_inode(inode, inode->i_mode, fattr->rdev);
+
+		nfsi->read_cache_jiffies = fattr->timestamp;
+		inode->i_atime = fattr->atime;
+		inode->i_mtime = fattr->mtime;
+		inode->i_ctime = fattr->ctime;
+		if (fattr->valid & NFS_ATTR_FATTR_V4)
+			nfsi->change_attr = fattr->change_attr;
+		inode->i_size = nfs_size_to_loff_t(fattr->size);
+		inode->i_nlink = fattr->nlink;
+		inode->i_uid = fattr->uid;
+		inode->i_gid = fattr->gid;
+		if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+			/*
+			 * report the blocks in 512byte units
+			 */
+			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+			inode->i_blksize = inode->i_sb->s_blocksize;
+		} else {
+			inode->i_blocks = fattr->du.nfs2.blocks;
+			inode->i_blksize = fattr->du.nfs2.blocksize;
+		}
+		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+		nfsi->attrtimeo_timestamp = jiffies;
+		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+		nfsi->cache_access.cred = NULL;
+
+		unlock_new_inode(inode);
+	} else
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
+		inode->i_sb->s_id,
+		(long long)NFS_FILEID(inode),
+		atomic_read(&inode->i_count));
+
+out:
+	return inode;
+
+out_no_inode:
+	printk("nfs_fhget: iget failed\n");
+	goto out;
+}
+
+#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET)
+
+int
+nfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs_fattr fattr;
+	int error;
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
+			attr->ia_valid &= ~ATTR_SIZE;
+	}
+
+	/* Optimization: if the end result is no change, don't RPC */
+	attr->ia_valid &= NFS_VALID_ATTRS;
+	if (attr->ia_valid == 0)
+		return 0;
+
+	lock_kernel();
+	nfs_begin_data_update(inode);
+	/* Write all dirty data if we're changing file permissions or size */
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
+		if (filemap_fdatawrite(inode->i_mapping) == 0)
+			filemap_fdatawait(inode->i_mapping);
+		nfs_wb_all(inode);
+	}
+	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
+	if (error == 0) {
+		nfs_refresh_inode(inode, &fattr);
+		if ((attr->ia_valid & ATTR_MODE) != 0) {
+			int mode;
+			mode = inode->i_mode & ~S_IALLUGO;
+			mode |= attr->ia_mode & S_IALLUGO;
+			inode->i_mode = mode;
+		}
+		if ((attr->ia_valid & ATTR_UID) != 0)
+			inode->i_uid = attr->ia_uid;
+		if ((attr->ia_valid & ATTR_GID) != 0)
+			inode->i_gid = attr->ia_gid;
+		if ((attr->ia_valid & ATTR_SIZE) != 0) {
+			inode->i_size = attr->ia_size;
+			vmtruncate(inode, attr->ia_size);
+		}
+	}
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+	nfs_end_data_update(inode);
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * Wait for the inode to get unlocked.
+ * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
+ */
+static int
+nfs_wait_on_inode(struct inode *inode, int flag)
+{
+	struct rpc_clnt	*clnt = NFS_CLIENT(inode);
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	int error;
+	if (!(NFS_FLAGS(inode) & flag))
+		return 0;
+	atomic_inc(&inode->i_count);
+	error = nfs_wait_event(clnt, nfsi->nfs_i_wait,
+				!(NFS_FLAGS(inode) & flag));
+	iput(inode);
+	return error;
+}
+
+int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
+	int err;
+
+	if (__IS_FLG(inode, MS_NOATIME))
+		need_atime = 0;
+	else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
+		need_atime = 0;
+	/* We may force a getattr if the user cares about atime */
+	if (need_atime)
+		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	else
+		err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!err)
+		generic_fillattr(inode, stat);
+	return err;
+}
+
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+{
+	struct nfs_open_context *ctx;
+
+	ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (ctx != NULL) {
+		atomic_set(&ctx->count, 1);
+		ctx->dentry = dget(dentry);
+		ctx->cred = get_rpccred(cred);
+		ctx->state = NULL;
+		ctx->lockowner = current->files;
+		ctx->error = 0;
+		init_waitqueue_head(&ctx->waitq);
+	}
+	return ctx;
+}
+
+struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+{
+	if (ctx != NULL)
+		atomic_inc(&ctx->count);
+	return ctx;
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->count)) {
+		if (!list_empty(&ctx->list)) {
+			struct inode *inode = ctx->dentry->d_inode;
+			spin_lock(&inode->i_lock);
+			list_del(&ctx->list);
+			spin_unlock(&inode->i_lock);
+		}
+		if (ctx->state != NULL)
+			nfs4_close_state(ctx->state, ctx->mode);
+		if (ctx->cred != NULL)
+			put_rpccred(ctx->cred);
+		dput(ctx->dentry);
+		kfree(ctx);
+	}
+}
+
+/*
+ * Ensure that mmap has a recent RPC credential for use when writing out
+ * shared pages
+ */
+void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	filp->private_data = get_nfs_open_context(ctx);
+	spin_lock(&inode->i_lock);
+	list_add(&ctx->list, &nfsi->open_files);
+	spin_unlock(&inode->i_lock);
+}
+
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_open_context *pos, *ctx = NULL;
+
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(pos, &nfsi->open_files, list) {
+		if ((pos->mode & mode) == mode) {
+			ctx = get_nfs_open_context(pos);
+			break;
+		}
+	}
+	spin_unlock(&inode->i_lock);
+	return ctx;
+}
+
+void nfs_file_clear_open_context(struct file *filp)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
+
+	if (ctx) {
+		filp->private_data = NULL;
+		spin_lock(&inode->i_lock);
+		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
+		spin_unlock(&inode->i_lock);
+		put_nfs_open_context(ctx);
+	}
+}
+
+/*
+ * These allocate and release file read/write context information.
+ */
+int nfs_open(struct inode *inode, struct file *filp)
+{
+	struct nfs_open_context *ctx;
+	struct rpc_cred *cred;
+
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+	put_rpccred(cred);
+	if (ctx == NULL)
+		return -ENOMEM;
+	ctx->mode = filp->f_mode;
+	nfs_file_set_open_context(filp, ctx);
+	put_nfs_open_context(ctx);
+	if ((filp->f_mode & FMODE_WRITE) != 0)
+		nfs_begin_data_update(inode);
+	return 0;
+}
+
+int nfs_release(struct inode *inode, struct file *filp)
+{
+	if ((filp->f_mode & FMODE_WRITE) != 0)
+		nfs_end_data_update(inode);
+	nfs_file_clear_open_context(filp);
+	return 0;
+}
+
+/*
+ * This function is called whenever some part of NFS notices that
+ * the cached attributes have to be refreshed.
+ */
+int
+__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+{
+	int		 status = -ESTALE;
+	struct nfs_fattr fattr;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	unsigned long verifier;
+	unsigned int flags;
+
+	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+
+	lock_kernel();
+	if (!inode || is_bad_inode(inode))
+ 		goto out_nowait;
+	if (NFS_STALE(inode))
+ 		goto out_nowait;
+
+	while (NFS_REVALIDATING(inode)) {
+		status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
+		if (status < 0)
+			goto out_nowait;
+		if (NFS_ATTRTIMEO(inode) == 0)
+			continue;
+		if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
+			continue;
+		status = NFS_STALE(inode) ? -ESTALE : 0;
+		goto out_nowait;
+	}
+	NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
+
+	/* Protect against RPC races by saving the change attribute */
+	verifier = nfs_save_change_attribute(inode);
+	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
+	if (status != 0) {
+		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+			 inode->i_sb->s_id,
+			 (long long)NFS_FILEID(inode), status);
+		if (status == -ESTALE) {
+			nfs_zap_caches(inode);
+			if (!S_ISDIR(inode->i_mode))
+				NFS_FLAGS(inode) |= NFS_INO_STALE;
+		}
+		goto out;
+	}
+
+	status = nfs_update_inode(inode, &fattr, verifier);
+	if (status) {
+		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+			 inode->i_sb->s_id,
+			 (long long)NFS_FILEID(inode), status);
+		goto out;
+	}
+	flags = nfsi->flags;
+	/*
+	 * We may need to keep the attributes marked as invalid if
+	 * we raced with nfs_end_attr_update().
+	 */
+	if (verifier == nfsi->cache_change_attribute)
+		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+	/* Do the page cache invalidation */
+	if (flags & NFS_INO_INVALID_DATA) {
+		if (S_ISREG(inode->i_mode)) {
+			if (filemap_fdatawrite(inode->i_mapping) == 0)
+				filemap_fdatawait(inode->i_mapping);
+			nfs_wb_all(inode);
+		}
+		nfsi->flags &= ~NFS_INO_INVALID_DATA;
+		invalidate_inode_pages2(inode->i_mapping);
+		memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode));
+		/* This ensures we revalidate dentries */
+		nfsi->cache_change_attribute++;
+	}
+	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
+		inode->i_sb->s_id,
+		(long long)NFS_FILEID(inode));
+
+out:
+	NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
+	wake_up(&nfsi->nfs_i_wait);
+ out_nowait:
+	unlock_kernel();
+	return status;
+}
+
+int nfs_attribute_timeout(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (nfs_have_delegation(inode, FMODE_READ))
+		return 0;
+	return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
+}
+
+/**
+ * nfs_revalidate_inode - Revalidate the inode attributes
+ * @server - pointer to nfs_server struct
+ * @inode - pointer to inode struct
+ *
+ * Updates inode attribute information by retrieving the data from the server.
+ */
+int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+{
+	if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+			&& !nfs_attribute_timeout(inode))
+		return NFS_STALE(inode) ? -ESTALE : 0;
+	return __nfs_revalidate_inode(server, inode);
+}
+
+/**
+ * nfs_begin_data_update
+ * @inode - pointer to inode
+ * Declare that a set of operations will update file data on the server
+ */
+void nfs_begin_data_update(struct inode *inode)
+{
+	atomic_inc(&NFS_I(inode)->data_updates);
+}
+
+/**
+ * nfs_end_data_update
+ * @inode - pointer to inode
+ * Declare end of the operations that will update file data
+ * This will mark the inode as immediately needing revalidation
+ * of its attribute cache.
+ */
+void nfs_end_data_update(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (!nfs_have_delegation(inode, FMODE_READ)) {
+		/* Mark the attribute cache for revalidation */
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		/* Directories and symlinks: invalidate page cache too */
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+			nfsi->flags |= NFS_INO_INVALID_DATA;
+	}
+	nfsi->cache_change_attribute ++;
+	atomic_dec(&nfsi->data_updates);
+}
+
+/**
+ * nfs_end_data_update_defer
+ * @inode - pointer to inode
+ * Declare end of the operations that will update file data
+ * This will defer marking the inode as needing revalidation
+ * unless there are no other pending updates.
+ */
+void nfs_end_data_update_defer(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (atomic_dec_and_test(&nfsi->data_updates)) {
+		/* Mark the attribute cache for revalidation */
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		/* Directories and symlinks: invalidate page cache too */
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+			nfsi->flags |= NFS_INO_INVALID_DATA;
+		nfsi->cache_change_attribute ++;
+	}
+}
+
+/**
+ * nfs_refresh_inode - verify consistency of the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * Verifies the attribute cache. If we have just changed the attributes,
+ * so that fattr carries weak cache consistency data, then it may
+ * also update the ctime/mtime/change_attribute.
+ */
+int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	loff_t cur_size, new_isize;
+	int data_unstable;
+
+	/* Do we hold a delegation? */
+	if (nfs_have_delegation(inode, FMODE_READ))
+		return 0;
+
+	/* Are we in the process of updating data on the server? */
+	data_unstable = nfs_caches_unstable(inode);
+
+	if (fattr->valid & NFS_ATTR_FATTR_V4) {
+		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
+				&& nfsi->change_attr == fattr->pre_change_attr)
+			nfsi->change_attr = fattr->change_attr;
+		if (!data_unstable && nfsi->change_attr != fattr->change_attr)
+			nfsi->flags |= NFS_INO_INVALID_ATTR;
+	}
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		return 0;
+
+	/* Has the inode gone and changed behind our back? */
+	if (nfsi->fileid != fattr->fileid
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+		return -EIO;
+
+	cur_size = i_size_read(inode);
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
+
+	/* If we have atomic WCC data, we may update some attributes */
+	if ((fattr->valid & NFS_ATTR_WCC) != 0) {
+		if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
+			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+		if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime))
+			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+	}
+
+	/* Verify a few of the more important attributes */
+	if (!data_unstable) {
+		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
+				|| cur_size != new_isize)
+			nfsi->flags |= NFS_INO_INVALID_ATTR;
+	} else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
+			nfsi->flags |= NFS_INO_INVALID_ATTR;
+
+	/* Have any file permissions changed? */
+	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
+			|| inode->i_uid != fattr->uid
+			|| inode->i_gid != fattr->gid)
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+
+	/* Has the link count changed? */
+	if (inode->i_nlink != fattr->nlink)
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+
+	if (!timespec_equal(&inode->i_atime, &fattr->atime))
+		nfsi->flags |= NFS_INO_INVALID_ATIME;
+
+	nfsi->read_cache_jiffies = fattr->timestamp;
+	return 0;
+}
+
+/*
+ * Many nfs protocol calls return the new file attributes after
+ * an operation.  Here we update the inode to reflect the state
+ * of the server's inode.
+ *
+ * This is a bit tricky because we have to make sure all dirty pages
+ * have been sent off to the server before calling invalidate_inode_pages.
+ * To make sure no other process adds more write requests while we try
+ * our best to flush them, we make them sleep during the attribute refresh.
+ *
+ * A very similar scenario holds for the dir cache.
+ */
+static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	__u64		new_size;
+	loff_t		new_isize;
+	unsigned int	invalid = 0;
+	loff_t		cur_isize;
+	int data_unstable;
+
+	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+			__FUNCTION__, inode->i_sb->s_id, inode->i_ino,
+			atomic_read(&inode->i_count), fattr->valid);
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		return 0;
+
+	if (nfsi->fileid != fattr->fileid) {
+		printk(KERN_ERR "%s: inode number mismatch\n"
+		       "expected (%s/0x%Lx), got (%s/0x%Lx)\n",
+		       __FUNCTION__,
+		       inode->i_sb->s_id, (long long)nfsi->fileid,
+		       inode->i_sb->s_id, (long long)fattr->fileid);
+		goto out_err;
+	}
+
+	/*
+	 * Make sure the inode's type hasn't changed.
+	 */
+	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+		goto out_changed;
+
+	/*
+	 * Update the read time so we don't revalidate too often.
+	 */
+	nfsi->read_cache_jiffies = fattr->timestamp;
+
+	/* Are we racing with known updates of the metadata on the server? */
+	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
+
+	/* Check if the file size agrees */
+	new_size = fattr->size;
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
+	cur_isize = i_size_read(inode);
+	if (cur_isize != new_size) {
+#ifdef NFS_DEBUG_VERBOSE
+		printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+#endif
+		/*
+		 * If we have pending writebacks, things can get
+		 * messy.
+		 */
+		if (S_ISREG(inode->i_mode) && data_unstable) {
+			if (new_isize > cur_isize) {
+				inode->i_size = new_isize;
+				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			}
+		} else {
+			inode->i_size = new_isize;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+		}
+	}
+
+	/*
+	 * Note: we don't check inode->i_mtime since pipes etc.
+	 *       can change this value in VFS without requiring a
+	 *	 cache revalidation.
+	 */
+	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+#ifdef NFS_DEBUG_VERBOSE
+		printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+#endif
+		if (!data_unstable)
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+	}
+
+	if ((fattr->valid & NFS_ATTR_FATTR_V4)
+	    && nfsi->change_attr != fattr->change_attr) {
+#ifdef NFS_DEBUG_VERBOSE
+		printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+		       inode->i_sb->s_id, inode->i_ino);
+#endif
+		nfsi->change_attr = fattr->change_attr;
+		if (!data_unstable)
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+	}
+
+	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+
+	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
+	    inode->i_uid != fattr->uid ||
+	    inode->i_gid != fattr->gid)
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+
+	inode->i_mode = fattr->mode;
+	inode->i_nlink = fattr->nlink;
+	inode->i_uid = fattr->uid;
+	inode->i_gid = fattr->gid;
+
+	if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+		/*
+		 * report the blocks in 512byte units
+		 */
+		inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+		inode->i_blksize = inode->i_sb->s_blocksize;
+ 	} else {
+ 		inode->i_blocks = fattr->du.nfs2.blocks;
+ 		inode->i_blksize = fattr->du.nfs2.blocksize;
+ 	}
+
+	/* Update attrtimeo value if we're out of the unstable period */
+	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+		nfsi->attrtimeo_timestamp = jiffies;
+	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
+		if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+			nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+		nfsi->attrtimeo_timestamp = jiffies;
+	}
+	/* Don't invalidate the data if we were to blame */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+				|| S_ISLNK(inode->i_mode)))
+		invalid &= ~NFS_INO_INVALID_DATA;
+	if (!nfs_have_delegation(inode, FMODE_READ))
+		nfsi->flags |= invalid;
+
+	return 0;
+ out_changed:
+	/*
+	 * Big trouble! The inode has become a different object.
+	 */
+#ifdef NFS_PARANOIA
+	printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
+			__FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode);
+#endif
+	/*
+	 * No need to worry about unhashing the dentry, as the
+	 * lookup validation will know that the inode is bad.
+	 * (But we fall through to invalidate the caches.)
+	 */
+	nfs_invalidate_inode(inode);
+ out_err:
+	NFS_FLAGS(inode) |= NFS_INO_STALE;
+	return -ESTALE;
+}
+
+/*
+ * File system information
+ */
+
+static int nfs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+	return set_anon_super(s, data);
+}
+ 
+static int nfs_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data;
+	struct nfs_server *old = NFS_SB(sb);
+
+	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+		return 0;
+	if (old->addr.sin_port != server->addr.sin_port)
+		return 0;
+	return !nfs_compare_fh(&old->fh, &server->fh);
+}
+
+static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data)
+{
+	int error;
+	struct nfs_server *server;
+	struct super_block *s;
+	struct nfs_fh *root;
+	struct nfs_mount_data *data = raw_data;
+
+	if (!data) {
+		printk("nfs_read_super: missing data argument\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+	memset(server, 0, sizeof(struct nfs_server));
+	/* Zero out the NFS state stuff */
+	init_nfsv4_state(server);
+
+	if (data->version != NFS_MOUNT_VERSION) {
+		printk("nfs warning: mount version %s than kernel\n",
+			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+		if (data->version < 2)
+			data->namlen = 0;
+		if (data->version < 3)
+			data->bsize  = 0;
+		if (data->version < 4) {
+			data->flags &= ~NFS_MOUNT_VER3;
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		}
+		if (data->version < 5)
+			data->flags &= ~NFS_MOUNT_SECFLAVOUR;
+	}
+
+	root = &server->fh;
+	if (data->flags & NFS_MOUNT_VER3)
+		root->size = data->root.size;
+	else
+		root->size = NFS2_FHSIZE;
+	if (root->size > sizeof(root->data)) {
+		printk("nfs_get_sb: invalid root filehandle\n");
+		kfree(server);
+		return ERR_PTR(-EINVAL);
+	}
+	memcpy(root->data, data->root.data, root->size);
+
+	/* We now require that the mount process passes the remote address */
+	memcpy(&server->addr, &data->addr, sizeof(server->addr));
+	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
+		printk("NFS: mount program didn't pass remote address!\n");
+		kfree(server);
+		return ERR_PTR(-EINVAL);
+	}
+
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+
+	if (IS_ERR(s) || s->s_root) {
+		kfree(server);
+		return s;
+	}
+
+	s->s_flags = flags;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		kfree(server);
+		return ERR_PTR(-EIO);
+	}
+
+	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	if (error) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return s;
+}
+
+static void nfs_kill_super(struct super_block *s)
+{
+	struct nfs_server *server = NFS_SB(s);
+
+	kill_anon_super(s);
+
+	if (server->client != NULL && !IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
+		rpc_shutdown_client(server->client_sys);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+
+	rpciod_down();		/* release rpciod */
+
+	if (server->hostname != NULL)
+		kfree(server->hostname);
+	kfree(server);
+}
+
+static struct file_system_type nfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs",
+	.get_sb		= nfs_get_sb,
+	.kill_sb	= nfs_kill_super,
+	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+#ifdef CONFIG_NFS_V4
+
+static void nfs4_clear_inode(struct inode *);
+
+
+static struct super_operations nfs4_sops = { 
+	.alloc_inode	= nfs_alloc_inode,
+	.destroy_inode	= nfs_destroy_inode,
+	.write_inode	= nfs_write_inode,
+	.delete_inode	= nfs_delete_inode,
+	.statfs		= nfs_statfs,
+	.clear_inode	= nfs4_clear_inode,
+	.umount_begin	= nfs_umount_begin,
+	.show_options	= nfs_show_options,
+};
+
+/*
+ * Clean out any remaining NFSv4 state that might be left over due
+ * to open() calls that passed nfs_atomic_lookup, but failed to call
+ * nfs_open().
+ */
+static void nfs4_clear_inode(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	/* If we are holding a delegation, return it! */
+	if (nfsi->delegation != NULL)
+		nfs_inode_return_delegation(inode);
+	/* First call standard NFS clear_inode() code */
+	nfs_clear_inode(inode);
+	/* Now clear out any remaining state */
+	while (!list_empty(&nfsi->open_states)) {
+		struct nfs4_state *state;
+		
+		state = list_entry(nfsi->open_states.next,
+				struct nfs4_state,
+				inode_states);
+		dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
+				__FUNCTION__,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				state);
+		BUG_ON(atomic_read(&state->count) != 1);
+		nfs4_close_state(state, state->state);
+	}
+}
+
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+	struct nfs_server *server;
+	struct nfs4_client *clp = NULL;
+	struct rpc_xprt *xprt = NULL;
+	struct rpc_clnt *clnt = NULL;
+	struct rpc_timeout timeparms;
+	rpc_authflavor_t authflavour;
+	int proto, err = -EIO;
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	server = NFS_SB(sb);
+	if (data->rsize != 0)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize != 0)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps = NFS_CAP_ATOMIC_OPEN;
+
+	server->acregmin = data->acregmin*HZ;
+	server->acregmax = data->acregmax*HZ;
+	server->acdirmin = data->acdirmin*HZ;
+	server->acdirmax = data->acdirmax*HZ;
+
+	server->rpc_ops = &nfs_v4_clientops;
+	/* Initialize timeout values */
+
+	timeparms.to_initval = data->timeo * HZ / 10;
+	timeparms.to_retries = data->retrans;
+	timeparms.to_exponential = 1;
+	if (!timeparms.to_retries)
+		timeparms.to_retries = 5;
+
+	proto = data->proto;
+	/* Which IP protocol do we use? */
+	switch (proto) {
+	case IPPROTO_TCP:
+		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 600 * HZ / 10;
+		break;
+	case IPPROTO_UDP:
+		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
+		if (!timeparms.to_initval)
+			timeparms.to_initval = 11 * HZ / 10;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	clp = nfs4_get_client(&server->addr.sin_addr);
+	if (!clp) {
+		printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
+		return -EIO;
+	}
+
+	/* Now create transport and client */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen > 1)
+			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
+			err = -EFAULT;
+			goto out_fail;
+		}
+	}
+
+	down_write(&clp->cl_sem);
+	if (clp->cl_rpcclient == NULL) {
+		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+		if (IS_ERR(xprt)) {
+			up_write(&clp->cl_sem);
+			printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+			err = PTR_ERR(xprt);
+			goto out_fail;
+		}
+		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+				server->rpc_ops->version, authflavour);
+		if (IS_ERR(clnt)) {
+			up_write(&clp->cl_sem);
+			printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+			xprt_destroy(xprt);
+			err = PTR_ERR(clnt);
+			goto out_fail;
+		}
+		clnt->cl_intr     = 1;
+		clnt->cl_softrtry = 1;
+		clnt->cl_chatty   = 1;
+		clp->cl_rpcclient = clnt;
+		clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0);
+		if (IS_ERR(clp->cl_cred)) {
+			up_write(&clp->cl_sem);
+			err = PTR_ERR(clp->cl_cred);
+			clp->cl_cred = NULL;
+			goto out_fail;
+		}
+		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
+		nfs_idmap_new(clp);
+	}
+	if (list_empty(&clp->cl_superblocks)) {
+		err = nfs4_init_client(clp);
+		if (err != 0) {
+			up_write(&clp->cl_sem);
+			goto out_fail;
+		}
+	}
+	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+	clnt = rpc_clone_client(clp->cl_rpcclient);
+	if (!IS_ERR(clnt))
+			server->nfs4_state = clp;
+	up_write(&clp->cl_sem);
+	clp = NULL;
+
+	if (IS_ERR(clnt)) {
+		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		return PTR_ERR(clnt);
+	}
+
+	server->client    = clnt;
+
+	if (server->nfs4_state->cl_idmap == NULL) {
+		printk(KERN_WARNING "NFS: failed to create idmapper.\n");
+		return -ENOMEM;
+	}
+
+	if (clnt->cl_auth->au_flavor != authflavour) {
+		if (rpcauth_create(authflavour, clnt) == NULL) {
+			printk(KERN_WARNING "NFS: couldn't create credcache!\n");
+			return -ENOMEM;
+		}
+	}
+
+	sb->s_time_gran = 1;
+
+	sb->s_op = &nfs4_sops;
+	err = nfs_sb_init(sb, authflavour);
+	if (err == 0)
+		return 0;
+out_fail:
+	if (clp)
+		nfs4_put_client(clp);
+	return err;
+}
+
+static int nfs4_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data;
+	struct nfs_server *old = NFS_SB(sb);
+
+	if (strcmp(server->hostname, old->hostname) != 0)
+		return 0;
+	if (strcmp(server->mnt_path, old->mnt_path) != 0)
+		return 0;
+	return 1;
+}
+
+static void *
+nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+{
+	void *p = NULL;
+
+	if (!src->len)
+		return ERR_PTR(-EINVAL);
+	if (src->len < maxlen)
+		maxlen = src->len;
+	if (dst == NULL) {
+		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+		if (p == NULL)
+			return ERR_PTR(-ENOMEM);
+	}
+	if (copy_from_user(dst, src->data, maxlen)) {
+		if (p != NULL)
+			kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	dst[maxlen] = '\0';
+	return dst;
+}
+
+static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data)
+{
+	int error;
+	struct nfs_server *server;
+	struct super_block *s;
+	struct nfs4_mount_data *data = raw_data;
+	void *p;
+
+	if (!data) {
+		printk("nfs_read_super: missing data argument\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+	memset(server, 0, sizeof(struct nfs_server));
+	/* Zero out the NFS state stuff */
+	init_nfsv4_state(server);
+
+	if (data->version != NFS4_MOUNT_VERSION) {
+		printk("nfs warning: mount version %s than kernel\n",
+			data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
+	}
+
+	p = nfs_copy_user_string(NULL, &data->hostname, 256);
+	if (IS_ERR(p))
+		goto out_err;
+	server->hostname = p;
+
+	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+	if (IS_ERR(p))
+		goto out_err;
+	server->mnt_path = p;
+
+	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
+			sizeof(server->ip_addr) - 1);
+	if (IS_ERR(p))
+		goto out_err;
+
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(server->addr)) {
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
+		s = ERR_PTR(-EFAULT);
+		goto out_free;
+	}
+	if (server->addr.sin_family != AF_INET ||
+	    server->addr.sin_addr.s_addr == INADDR_ANY) {
+		printk("NFS: mount program didn't pass remote IP address!\n");
+		s = ERR_PTR(-EINVAL);
+		goto out_free;
+	}
+
+	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+
+	if (IS_ERR(s) || s->s_root)
+		goto out_free;
+
+	s->s_flags = flags;
+
+	/* Fire up rpciod if not yet running */
+	if (rpciod_up() != 0) {
+		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+		s = ERR_PTR(-EIO);
+		goto out_free;
+	}
+
+	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+	if (error) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		return ERR_PTR(error);
+	}
+	s->s_flags |= MS_ACTIVE;
+	return s;
+out_err:
+	s = (struct super_block *)p;
+out_free:
+	if (server->mnt_path)
+		kfree(server->mnt_path);
+	if (server->hostname)
+		kfree(server->hostname);
+	kfree(server);
+	return s;
+}
+
+static void nfs4_kill_super(struct super_block *sb)
+{
+	struct nfs_server *server = NFS_SB(sb);
+
+	nfs_return_all_delegations(sb);
+	kill_anon_super(sb);
+
+	nfs4_renewd_prepare_shutdown(server);
+
+	if (server->client != NULL && !IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+	rpciod_down();		/* release rpciod */
+
+	destroy_nfsv4_state(server);
+
+	if (server->hostname != NULL)
+		kfree(server->hostname);
+	kfree(server);
+}
+
+static struct file_system_type nfs4_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_get_sb,
+	.kill_sb	= nfs4_kill_super,
+	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+#define nfs4_init_once(nfsi) \
+	do { \
+		INIT_LIST_HEAD(&(nfsi)->open_states); \
+		nfsi->delegation = NULL; \
+		nfsi->delegation_state = 0; \
+		init_rwsem(&nfsi->rwsem); \
+	} while(0)
+#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
+#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+#else
+#define nfs4_init_once(nfsi) \
+	do { } while (0)
+#define register_nfs4fs() (0)
+#define unregister_nfs4fs()
+#endif
+
+extern int nfs_init_nfspagecache(void);
+extern void nfs_destroy_nfspagecache(void);
+extern int nfs_init_readpagecache(void);
+extern void nfs_destroy_readpagecache(void);
+extern int nfs_init_writepagecache(void);
+extern void nfs_destroy_writepagecache(void);
+#ifdef CONFIG_NFS_DIRECTIO
+extern int nfs_init_directcache(void);
+extern void nfs_destroy_directcache(void);
+#endif
+
+static kmem_cache_t * nfs_inode_cachep;
+
+static struct inode *nfs_alloc_inode(struct super_block *sb)
+{
+	struct nfs_inode *nfsi;
+	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
+	if (!nfsi)
+		return NULL;
+	nfsi->flags = 0;
+	return &nfsi->vfs_inode;
+}
+
+static void nfs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		inode_init_once(&nfsi->vfs_inode);
+		spin_lock_init(&nfsi->req_lock);
+		INIT_LIST_HEAD(&nfsi->dirty);
+		INIT_LIST_HEAD(&nfsi->commit);
+		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+		atomic_set(&nfsi->data_updates, 0);
+		nfsi->ndirty = 0;
+		nfsi->ncommit = 0;
+		nfsi->npages = 0;
+		init_waitqueue_head(&nfsi->nfs_i_wait);
+		nfs4_init_once(nfsi);
+	}
+}
+ 
+int nfs_init_inodecache(void)
+{
+	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
+					     sizeof(struct nfs_inode),
+					     0, SLAB_RECLAIM_ACCOUNT,
+					     init_once, NULL);
+	if (nfs_inode_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_inodecache(void)
+{
+	if (kmem_cache_destroy(nfs_inode_cachep))
+		printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
+}
+
+/*
+ * Initialize NFS
+ */
+static int __init init_nfs_fs(void)
+{
+	int err;
+
+	err = nfs_init_nfspagecache();
+	if (err)
+		goto out4;
+
+	err = nfs_init_inodecache();
+	if (err)
+		goto out3;
+
+	err = nfs_init_readpagecache();
+	if (err)
+		goto out2;
+
+	err = nfs_init_writepagecache();
+	if (err)
+		goto out1;
+
+#ifdef CONFIG_NFS_DIRECTIO
+	err = nfs_init_directcache();
+	if (err)
+		goto out0;
+#endif
+
+#ifdef CONFIG_PROC_FS
+	rpc_proc_register(&nfs_rpcstat);
+#endif
+        err = register_filesystem(&nfs_fs_type);
+	if (err)
+		goto out;
+	if ((err = register_nfs4fs()) != 0)
+		goto out;
+	return 0;
+out:
+#ifdef CONFIG_PROC_FS
+	rpc_proc_unregister("nfs");
+#endif
+	nfs_destroy_writepagecache();
+#ifdef CONFIG_NFS_DIRECTIO
+out0:
+	nfs_destroy_directcache();
+#endif
+out1:
+	nfs_destroy_readpagecache();
+out2:
+	nfs_destroy_inodecache();
+out3:
+	nfs_destroy_nfspagecache();
+out4:
+	return err;
+}
+
+static void __exit exit_nfs_fs(void)
+{
+#ifdef CONFIG_NFS_DIRECTIO
+	nfs_destroy_directcache();
+#endif
+	nfs_destroy_writepagecache();
+	nfs_destroy_readpagecache();
+	nfs_destroy_inodecache();
+	nfs_destroy_nfspagecache();
+#ifdef CONFIG_PROC_FS
+	rpc_proc_unregister("nfs");
+#endif
+	unregister_filesystem(&nfs_fs_type);
+	unregister_nfs4fs();
+}
+
+/* Not quite true; I just maintain it */
+MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_LICENSE("GPL");
+
+module_init(init_nfs_fs)
+module_exit(exit_nfs_fs)
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
new file mode 100644
index 00000000000..9d3ddad96d9
--- /dev/null
+++ b/fs/nfs/mount_clnt.c
@@ -0,0 +1,183 @@
+/*
+ * linux/fs/nfs/mount_clnt.c
+ *
+ * MOUNT client to support NFSroot.
+ *
+ * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs_fs.h>
+
+#ifdef RPC_DEBUG
+# define NFSDBG_FACILITY	NFSDBG_ROOT
+#endif
+
+/*
+#define MOUNT_PROGRAM		100005
+#define MOUNT_VERSION		1
+#define MOUNT_MNT		1
+#define MOUNT_UMNT		3
+ */
+
+static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *,
+								int, int);
+static struct rpc_program	mnt_program;
+
+struct mnt_fhstatus {
+	unsigned int		status;
+	struct nfs_fh *		fh;
+};
+
+/*
+ * Obtain an NFS file handle for the given host and path
+ */
+int
+nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+		int version, int protocol)
+{
+	struct rpc_clnt		*mnt_clnt;
+	struct mnt_fhstatus	result = {
+		.fh		= fh
+	};
+	char			hostname[32];
+	int			status;
+	int			call;
+
+	dprintk("NFS:      nfs_mount(%08x:%s)\n",
+			(unsigned)ntohl(addr->sin_addr.s_addr), path);
+
+	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
+	mnt_clnt = mnt_create(hostname, addr, version, protocol);
+	if (IS_ERR(mnt_clnt))
+		return PTR_ERR(mnt_clnt);
+
+	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
+	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	return status < 0? status : (result.status? -EACCES : 0);
+}
+
+static struct rpc_clnt *
+mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
+		int protocol)
+{
+	struct rpc_xprt	*xprt;
+	struct rpc_clnt	*clnt;
+
+	xprt = xprt_create_proto(protocol, srvaddr, NULL);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+
+	clnt = rpc_create_client(xprt, hostname,
+				&mnt_program, version,
+				RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		xprt_destroy(xprt);
+	} else {
+		clnt->cl_softrtry = 1;
+		clnt->cl_chatty   = 1;
+		clnt->cl_oneshot  = 1;
+		clnt->cl_intr = 1;
+	}
+	return clnt;
+}
+
+/*
+ * XDR encode/decode functions for MOUNT
+ */
+static int
+xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
+{
+	p = xdr_encode_string(p, path);
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+	struct nfs_fh *fh = res->fh;
+
+	if ((res->status = ntohl(*p++)) == 0) {
+		fh->size = NFS2_FHSIZE;
+		memcpy(fh->data, p, NFS2_FHSIZE);
+	}
+	return 0;
+}
+
+static int
+xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+	struct nfs_fh *fh = res->fh;
+
+	if ((res->status = ntohl(*p++)) == 0) {
+		int size = ntohl(*p++);
+		if (size <= NFS3_FHSIZE) {
+			fh->size = size;
+			memcpy(fh->data, p, size);
+		} else
+			res->status = -EBADHANDLE;
+	}
+	return 0;
+}
+
+#define MNT_dirpath_sz		(1 + 256)
+#define MNT_fhstatus_sz		(1 + 8)
+
+static struct rpc_procinfo	mnt_procedures[] = {
+[MNTPROC_MNT] = {
+	  .p_proc		= MNTPROC_MNT,
+	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
+	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
+	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	},
+};
+
+static struct rpc_procinfo mnt3_procedures[] = {
+[MOUNTPROC3_MNT] = {
+	  .p_proc		= MOUNTPROC3_MNT,
+	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
+	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
+	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	},
+};
+
+
+static struct rpc_version	mnt_version1 = {
+		.number		= 1,
+		.nrprocs 	= 2,
+		.procs 		= mnt_procedures
+};
+
+static struct rpc_version       mnt_version3 = {
+		.number		= 3,
+		.nrprocs	= 2,
+		.procs		= mnt3_procedures
+};
+
+static struct rpc_version *	mnt_version[] = {
+	NULL,
+	&mnt_version1,
+	NULL,
+	&mnt_version3,
+};
+
+static struct rpc_stat		mnt_stats;
+
+static struct rpc_program	mnt_program = {
+	.name		= "mount",
+	.number		= NFS_MNT_PROGRAM,
+	.nrvers		= sizeof(mnt_version)/sizeof(mnt_version[0]),
+	.version	= mnt_version,
+	.stats		= &mnt_stats,
+};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
new file mode 100644
index 00000000000..d91b69044a4
--- /dev/null
+++ b/fs/nfs/nfs2xdr.c
@@ -0,0 +1,711 @@
+/*
+ * linux/fs/nfs/nfs2xdr.c
+ *
+ * XDR functions to encode/decode NFS RPC arguments and results.
+ *
+ * Copyright (C) 1992, 1993, 1994  Rick Sladkey
+ * Copyright (C) 1996 Olaf Kirch
+ * 04 Aug 1998  Ion Badulescu <ionut@cs.columbia.edu>
+ * 		FIFO's need special handling in NFSv2
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+/* #define NFS_PARANOIA 1 */
+
+extern int			nfs_stat_to_errno(int stat);
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+/*
+ * Declare the space requirements for NFS arguments and replies as
+ * number of 32bit-words
+ */
+#define NFS_fhandle_sz		(8)
+#define NFS_sattr_sz		(8)
+#define NFS_filename_sz		(1+(NFS2_MAXNAMLEN>>2))
+#define NFS_path_sz		(1+(NFS2_MAXPATHLEN>>2))
+#define NFS_fattr_sz		(17)
+#define NFS_info_sz		(5)
+#define NFS_entry_sz		(NFS_filename_sz+3)
+
+#define NFS_diropargs_sz	(NFS_fhandle_sz+NFS_filename_sz)
+#define NFS_sattrargs_sz	(NFS_fhandle_sz+NFS_sattr_sz)
+#define NFS_readlinkargs_sz	(NFS_fhandle_sz)
+#define NFS_readargs_sz		(NFS_fhandle_sz+3)
+#define NFS_writeargs_sz	(NFS_fhandle_sz+4)
+#define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
+#define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
+#define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
+
+#define NFS_attrstat_sz		(1+NFS_fattr_sz)
+#define NFS_diropres_sz		(1+NFS_fhandle_sz+NFS_fattr_sz)
+#define NFS_readlinkres_sz	(2)
+#define NFS_readres_sz		(1+NFS_fattr_sz+1)
+#define NFS_writeres_sz         (NFS_attrstat_sz)
+#define NFS_stat_sz		(1)
+#define NFS_readdirres_sz	(1)
+#define NFS_statfsres_sz	(1+NFS_info_sz)
+
+/*
+ * Common NFS XDR functions as inlines
+ */
+static inline u32 *
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+	memcpy(p, fhandle->data, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32 *
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+	/* NFSv2 handles have a fixed length */
+	fhandle->size = NFS2_FHSIZE;
+	memcpy(fhandle->data, p, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32*
+xdr_encode_time(u32 *p, struct timespec *timep)
+{
+	*p++ = htonl(timep->tv_sec);
+	/* Convert nanoseconds into microseconds */
+	*p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
+	return p;
+}
+
+static inline u32*
+xdr_encode_current_server_time(u32 *p, struct timespec *timep)
+{
+	/*
+	 * Passing the invalid value useconds=1000000 is a
+	 * Sun convention for "set to current server time".
+	 * It's needed to make permissions checks for the
+	 * "touch" program across v2 mounts to Solaris and
+	 * Irix boxes work correctly. See description of
+	 * sattr in section 6.1 of "NFS Illustrated" by
+	 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
+	 */
+	*p++ = htonl(timep->tv_sec);
+	*p++ = htonl(1000000);
+	return p;
+}
+
+static inline u32*
+xdr_decode_time(u32 *p, struct timespec *timep)
+{
+	timep->tv_sec = ntohl(*p++);
+	/* Convert microseconds into nanoseconds */
+	timep->tv_nsec = ntohl(*p++) * 1000;
+	return p;
+}
+
+static u32 *
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+{
+	u32 rdev;
+	fattr->type = (enum nfs_ftype) ntohl(*p++);
+	fattr->mode = ntohl(*p++);
+	fattr->nlink = ntohl(*p++);
+	fattr->uid = ntohl(*p++);
+	fattr->gid = ntohl(*p++);
+	fattr->size = ntohl(*p++);
+	fattr->du.nfs2.blocksize = ntohl(*p++);
+	rdev = ntohl(*p++);
+	fattr->du.nfs2.blocks = ntohl(*p++);
+	fattr->fsid_u.nfs3 = ntohl(*p++);
+	fattr->fileid = ntohl(*p++);
+	p = xdr_decode_time(p, &fattr->atime);
+	p = xdr_decode_time(p, &fattr->mtime);
+	p = xdr_decode_time(p, &fattr->ctime);
+	fattr->valid |= NFS_ATTR_FATTR;
+	fattr->rdev = new_decode_dev(rdev);
+	if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
+		fattr->type = NFFIFO;
+		fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
+		fattr->rdev = 0;
+	}
+	fattr->timestamp = jiffies;
+	return p;
+}
+
+#define SATTR(p, attr, flag, field) \
+        *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
+static inline u32 *
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+	SATTR(p, attr, ATTR_MODE, ia_mode);
+	SATTR(p, attr, ATTR_UID, ia_uid);
+	SATTR(p, attr, ATTR_GID, ia_gid);
+	SATTR(p, attr, ATTR_SIZE, ia_size);
+
+	if (attr->ia_valid & ATTR_ATIME_SET) {
+		p = xdr_encode_time(p, &attr->ia_atime);
+	} else if (attr->ia_valid & ATTR_ATIME) {
+		p = xdr_encode_current_server_time(p, &attr->ia_atime);
+	} else {
+		*p++ = ~(u32) 0;
+		*p++ = ~(u32) 0;
+	}
+
+	if (attr->ia_valid & ATTR_MTIME_SET) {
+		p = xdr_encode_time(p, &attr->ia_mtime);
+	} else if (attr->ia_valid & ATTR_MTIME) {
+		p = xdr_encode_current_server_time(p, &attr->ia_mtime);
+	} else {
+		*p++ = ~(u32) 0;	
+		*p++ = ~(u32) 0;
+	}
+  	return p;
+}
+#undef SATTR
+
+/*
+ * NFS encode functions
+ */
+/*
+ * Encode file handle argument
+ * GETATTR, READLINK, STATFS
+ */
+static int
+nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+{
+	p = xdr_encode_fhandle(p, fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SETATTR arguments
+ */
+static int
+nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode directory ops argument
+ * LOOKUP, REMOVE, RMDIR
+ */
+static int
+nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Arguments to a READ call. Since we read data directly into the page
+ * cache, we also set up the reply iovec here so that iov[1] points
+ * exactly to the page we want to fetch.
+ */
+static int
+nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	unsigned int replen;
+	u32 offset = (u32)args->offset;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(offset);
+	*p++ = htonl(count);
+	*p++ = htonl(count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Decode READ reply
+ */
+static int
+nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+{
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int	status, count, recvd, hdrlen;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_fattr(p, res->fattr);
+
+	count = ntohl(*p++);
+	res->eof = 0;
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READ reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READ header is short. iovec will be shifted.\n");
+		xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+	}
+
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (count > recvd) {
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
+			"count %d > recvd %d\n", count, recvd);
+		count = recvd;
+	}
+
+	dprintk("RPC:      readres OK count %d\n", count);
+	if (count < res->count)
+		res->count = count;
+
+	return count;
+}
+
+
+/*
+ * Write arguments. Splice the buffer to be written into the iovec.
+ */
+static int
+nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	u32 offset = (u32)args->offset;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(offset);
+	*p++ = htonl(offset);
+	*p++ = htonl(count);
+	*p++ = htonl(count);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	/* Copy the page array */
+	xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Encode create arguments
+ * CREATE, MKDIR
+ */
+static int
+nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode RENAME arguments
+ */
+static int
+nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode LINK arguments
+ */
+static int
+nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SYMLINK arguments
+ */
+static int
+nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_array(p, args->topath, args->tolen);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode arguments to readdir call
+ */
+static int
+nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
+{
+	struct rpc_task	*task = req->rq_task;
+	struct rpc_auth	*auth = task->tk_auth;
+	unsigned int replen;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->cookie);
+	*p++ = htonl(count); /* see above */
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
+	return 0;
+}
+
+/*
+ * Decode the result of a readdir call.
+ * We're not really decoding anymore, we just leave the buffer untouched
+ * and only check that it is syntactically correct.
+ * The real decoding happens in nfs_decode_entry below, called directly
+ * from nfs_readdir for each entry.
+ */
+static int
+nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	struct page **page;
+	int hdrlen, recvd;
+	int status, nr;
+	unsigned int len, pglen;
+	u32 *end, *entry, *kaddr;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+
+	pglen = rcvbuf->page_len;
+	recvd = rcvbuf->len - hdrlen;
+	if (pglen > recvd)
+		pglen = recvd;
+	page = rcvbuf->pages;
+	kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
+	end = (u32 *)((char *)p + pglen);
+	entry = p;
+	for (nr = 0; *p++; nr++) {
+		if (p + 2 > end)
+			goto short_pkt;
+		p++; /* fileid */
+		len = ntohl(*p++);
+		p += XDR_QUADLEN(len) + 1;	/* name plus cookie */
+		if (len > NFS2_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
+						len);
+			goto err_unmap;
+		}
+		if (p + 2 > end)
+			goto short_pkt;
+		entry = p;
+	}
+	if (!nr && (entry[0] != 0 || entry[1] == 0))
+		goto short_pkt;
+ out:
+	kunmap_atomic(kaddr, KM_USER0);
+	return nr;
+ short_pkt:
+	entry[0] = entry[1] = 0;
+	/* truncate listing ? */
+	if (!nr) {
+		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
+	goto out;
+err_unmap:
+	nr = -errno_NFSERR_IO;
+	goto out;
+}
+
+u32 *
+nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->ino	  = ntohl(*p++);
+	entry->len	  = ntohl(*p++);
+	entry->name	  = (const char *) p;
+	p		 += XDR_QUADLEN(entry->len);
+	entry->prev_cookie	  = entry->cookie;
+	entry->cookie	  = ntohl(*p++);
+	entry->eof	  = !p[0] && p[1];
+
+	return p;
+}
+
+/*
+ * NFS XDR decode functions
+ */
+/*
+ * Decode simple status reply
+ */
+static int
+nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	return status;
+}
+
+/*
+ * Decode attrstat reply
+ * GETATTR, SETATTR, WRITE
+ */
+static int
+nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	xdr_decode_fattr(p, fattr);
+	return 0;
+}
+
+/*
+ * Decode diropres reply
+ * LOOKUP, CREATE, MKDIR
+ */
+static int
+nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_fhandle(p, res->fh);
+	xdr_decode_fattr(p, res->fattr);
+	return 0;
+}
+
+/*
+ * Encode READLINK args
+ */
+static int
+nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
+	return 0;
+}
+
+/*
+ * Decode READLINK reply
+ */
+static int
+nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	int hdrlen, len, recvd;
+	char	*kaddr;
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	/* Convert length of symlink */
+	len = ntohl(*p++);
+	if (len >= rcvbuf->page_len || len <= 0) {
+		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		return -ENAMETOOLONG;
+	}
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (recvd < len) {
+		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+				"count %u > recvd %u\n", len, recvd);
+		return -EIO;
+	}
+
+	/* NULL terminate the string we got */
+	kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+	kaddr[len+rcvbuf->page_base] = '\0';
+	kunmap_atomic(kaddr, KM_USER0);
+	return 0;
+}
+
+/*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	res->verf->committed = NFS_FILE_SYNC;
+	return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
+ * Decode STATFS reply
+ */
+static int
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+
+	res->tsize  = ntohl(*p++);
+	res->bsize  = ntohl(*p++);
+	res->blocks = ntohl(*p++);
+	res->bfree  = ntohl(*p++);
+	res->bavail = ntohl(*p++);
+	return 0;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static struct {
+	int stat;
+	int errno;
+} nfs_errtbl[] = {
+	{ NFS_OK,		0		},
+	{ NFSERR_PERM,		EPERM		},
+	{ NFSERR_NOENT,		ENOENT		},
+	{ NFSERR_IO,		errno_NFSERR_IO	},
+	{ NFSERR_NXIO,		ENXIO		},
+/*	{ NFSERR_EAGAIN,	EAGAIN		}, */
+	{ NFSERR_ACCES,		EACCES		},
+	{ NFSERR_EXIST,		EEXIST		},
+	{ NFSERR_XDEV,		EXDEV		},
+	{ NFSERR_NODEV,		ENODEV		},
+	{ NFSERR_NOTDIR,	ENOTDIR		},
+	{ NFSERR_ISDIR,		EISDIR		},
+	{ NFSERR_INVAL,		EINVAL		},
+	{ NFSERR_FBIG,		EFBIG		},
+	{ NFSERR_NOSPC,		ENOSPC		},
+	{ NFSERR_ROFS,		EROFS		},
+	{ NFSERR_MLINK,		EMLINK		},
+	{ NFSERR_NAMETOOLONG,	ENAMETOOLONG	},
+	{ NFSERR_NOTEMPTY,	ENOTEMPTY	},
+	{ NFSERR_DQUOT,		EDQUOT		},
+	{ NFSERR_STALE,		ESTALE		},
+	{ NFSERR_REMOTE,	EREMOTE		},
+#ifdef EWFLUSH
+	{ NFSERR_WFLUSH,	EWFLUSH		},
+#endif
+	{ NFSERR_BADHANDLE,	EBADHANDLE	},
+	{ NFSERR_NOT_SYNC,	ENOTSYNC	},
+	{ NFSERR_BAD_COOKIE,	EBADCOOKIE	},
+	{ NFSERR_NOTSUPP,	ENOTSUPP	},
+	{ NFSERR_TOOSMALL,	ETOOSMALL	},
+	{ NFSERR_SERVERFAULT,	ESERVERFAULT	},
+	{ NFSERR_BADTYPE,	EBADTYPE	},
+	{ NFSERR_JUKEBOX,	EJUKEBOX	},
+	{ -1,			EIO		}
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+int
+nfs_stat_to_errno(int stat)
+{
+	int i;
+
+	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+		if (nfs_errtbl[i].stat == stat)
+			return nfs_errtbl[i].errno;
+	}
+	printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+	return nfs_errtbl[i].errno;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype, timer)				\
+[NFSPROC_##proc] = {							\
+	.p_proc	    =  NFSPROC_##proc,					\
+	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
+	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
+	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
+	.p_timer    =  timer						\
+	}
+struct rpc_procinfo	nfs_procedures[] = {
+    PROC(GETATTR,	fhandle,	attrstat, 1),
+    PROC(SETATTR,	sattrargs,	attrstat, 0),
+    PROC(LOOKUP,	diropargs,	diropres, 2),
+    PROC(READLINK,	readlinkargs,	readlinkres, 3),
+    PROC(READ,		readargs,	readres, 3),
+    PROC(WRITE,		writeargs,	writeres, 4),
+    PROC(CREATE,	createargs,	diropres, 0),
+    PROC(REMOVE,	diropargs,	stat, 0),
+    PROC(RENAME,	renameargs,	stat, 0),
+    PROC(LINK,		linkargs,	stat, 0),
+    PROC(SYMLINK,	symlinkargs,	stat, 0),
+    PROC(MKDIR,		createargs,	diropres, 0),
+    PROC(RMDIR,		diropargs,	stat, 0),
+    PROC(READDIR,	readdirargs,	readdirres, 3),
+    PROC(STATFS,	fhandle,	statfsres, 0),
+};
+
+struct rpc_version		nfs_version2 = {
+	.number			= 2,
+	.nrprocs		= sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+	.procs			= nfs_procedures
+};
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
new file mode 100644
index 00000000000..3878494dfc2
--- /dev/null
+++ b/fs/nfs/nfs3proc.c
@@ -0,0 +1,859 @@
+/*
+ *  linux/fs/nfs/nfs3proc.c
+ *
+ *  Client-side NFSv3 procedures stubs.
+ *
+ *  Copyright (C) 1997, Olaf Kirch
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+extern struct rpc_procinfo nfs3_procedures[];
+
+/* A wrapper to handle the EJUKEBOX error message */
+static int
+nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+{
+	sigset_t oldset;
+	int res;
+	rpc_clnt_sigmask(clnt, &oldset);
+	do {
+		res = rpc_call_sync(clnt, msg, flags);
+		if (res != -EJUKEBOX)
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+		res = -ERESTARTSYS;
+	} while (!signalled());
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return res;
+}
+
+static inline int
+nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[proc],
+		.rpc_argp	= argp,
+		.rpc_resp	= resp,
+	};
+	return nfs3_rpc_wrapper(clnt, &msg, flags);
+}
+
+#define rpc_call(clnt, proc, argp, resp, flags) \
+		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
+#define rpc_call_sync(clnt, msg, flags) \
+		nfs3_rpc_wrapper(clnt, msg, flags)
+
+static int
+nfs3_async_handle_jukebox(struct rpc_task *task)
+{
+	if (task->tk_status != -EJUKEBOX)
+		return 0;
+	task->tk_status = 0;
+	rpc_restart_call(task);
+	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
+	return 1;
+}
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("%s: call  fsinfo\n", __FUNCTION__);
+	info->fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
+	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+		status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+	}
+	return status;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getattr\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_GETATTR,
+			  fhandle, fattr, 0);
+	dprintk("NFS reply getattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+			struct iattr *sattr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs3_sattrargs	arg = {
+		.fh		= NFS_FH(inode),
+		.sattr		= sattr,
+	};
+	int	status;
+
+	dprintk("NFS call  setattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	dprintk("NFS reply setattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	dprintk("NFS call  lookup %s\n", name->name);
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
+		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
+			 fhandle, fattr, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	if (status >= 0)
+		status = nfs_refresh_inode(dir, &dir_attr);
+	return status;
+}
+
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+	struct nfs_fattr	fattr;
+	struct nfs3_accessargs	arg = {
+		.fh		= NFS_FH(inode),
+	};
+	struct nfs3_accessres	res = {
+		.fattr		= &fattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+		.rpc_cred	= entry->cred
+	};
+	int mode = entry->mask;
+	int status;
+
+	dprintk("NFS call  access\n");
+	fattr.valid = 0;
+
+	if (mode & MAY_READ)
+		arg.access |= NFS3_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			arg.access |= NFS3_ACCESS_LOOKUP;
+	} else {
+		if (mode & MAY_WRITE)
+			arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			arg.access |= NFS3_ACCESS_EXECUTE;
+	}
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	nfs_refresh_inode(inode, &fattr);
+	if (status == 0) {
+		entry->mask = 0;
+		if (res.access & NFS3_ACCESS_READ)
+			entry->mask |= MAY_READ;
+		if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
+			entry->mask |= MAY_WRITE;
+		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+			entry->mask |= MAY_EXEC;
+	}
+	dprintk("NFS reply access: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs_fattr	fattr;
+	struct nfs3_readlinkargs args = {
+		.fh		= NFS_FH(inode),
+		.pgbase		= pgbase,
+		.pglen		= pglen,
+		.pages		= &page
+	};
+	int			status;
+
+	dprintk("NFS call  readlink\n");
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
+			  &args, &fattr, 0);
+	nfs_refresh_inode(inode, &fattr);
+	dprintk("NFS reply readlink: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_read(struct nfs_read_data *rdata)
+{
+	int			flags = rdata->flags;
+	struct inode *		inode = rdata->inode;
+	struct nfs_fattr *	fattr = rdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
+		.rpc_argp	= &rdata->args,
+		.rpc_resp	= &rdata->res,
+		.rpc_cred	= rdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
+			(long long) rdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply read: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_write(struct nfs_write_data *wdata)
+{
+	int			rpcflags = wdata->flags;
+	struct inode *		inode = wdata->inode;
+	struct nfs_fattr *	fattr = wdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
+		.rpc_argp	= &wdata->args,
+		.rpc_resp	= &wdata->res,
+		.rpc_cred	= wdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
+			(long long) wdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply write: %d\n", status);
+	return status < 0? status : wdata->res.count;
+}
+
+static int nfs3_proc_commit(struct nfs_write_data *cdata)
+{
+	struct inode *		inode = cdata->inode;
+	struct nfs_fattr *	fattr = cdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
+		.rpc_argp	= &cdata->args,
+		.rpc_resp	= &cdata->res,
+		.rpc_cred	= cdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
+			(long long) cdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply commit: %d\n", status);
+	return status;
+}
+
+/*
+ * Create a regular file.
+ * For now, we don't implement O_EXCL.
+ */
+static int
+nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		 int flags)
+{
+	struct nfs_fh		fhandle;
+	struct nfs_fattr	fattr;
+	struct nfs_fattr	dir_attr;
+	struct nfs3_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr,
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	arg.createmode = NFS3_CREATE_UNCHECKED;
+	if (flags & O_EXCL) {
+		arg.createmode  = NFS3_CREATE_EXCLUSIVE;
+		arg.verifier[0] = jiffies;
+		arg.verifier[1] = current->pid;
+	}
+
+again:
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+
+	/* If the server doesn't support the exclusive creation semantics,
+	 * try again with simple 'guarded' mode. */
+	if (status == NFSERR_NOTSUPP) {
+		switch (arg.createmode) {
+			case NFS3_CREATE_EXCLUSIVE:
+				arg.createmode = NFS3_CREATE_GUARDED;
+				break;
+
+			case NFS3_CREATE_GUARDED:
+				arg.createmode = NFS3_CREATE_UNCHECKED;
+				break;
+
+			case NFS3_CREATE_UNCHECKED:
+				goto out;
+		}
+		goto again;
+	}
+
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+
+	/* When we created the file with exclusive semantics, make
+	 * sure we set the attributes afterwards. */
+	if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
+		dprintk("NFS call  setattr (post-create)\n");
+
+		if (!(sattr->ia_valid & ATTR_ATIME_SET))
+			sattr->ia_valid |= ATTR_ATIME;
+		if (!(sattr->ia_valid & ATTR_MTIME_SET))
+			sattr->ia_valid |= ATTR_MTIME;
+
+		/* Note: we could use a guarded setattr here, but I'm
+		 * not sure this buys us anything (and I'd have
+		 * to revamp the NFSv3 XDR code) */
+		status = nfs3_proc_setattr(dentry, &fattr, sattr);
+		nfs_refresh_inode(dentry->d_inode, &fattr);
+		dprintk("NFS reply setattr (post-create): %d\n", status);
+	}
+out:
+	dprintk("NFS reply create: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_REMOVE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
+	int			status;
+
+	dprintk("NFS call  remove %s\n", name->name);
+	dir_attr.valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply remove: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct unlinkxdr {
+		struct nfs3_diropargs arg;
+		struct nfs_fattr res;
+	} *ptr;
+
+	ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+	ptr->arg.fh = NFS_FH(dir->d_inode);
+	ptr->arg.name = name->name;
+	ptr->arg.len = name->len;
+	ptr->res.valid = 0;
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
+	msg->rpc_argp = &ptr->arg;
+	msg->rpc_resp = &ptr->res;
+	return 0;
+}
+
+static int
+nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct nfs_fattr	*dir_attr;
+
+	if (nfs3_async_handle_jukebox(task))
+		return 1;
+	if (msg->rpc_argp) {
+		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
+		nfs_refresh_inode(dir->d_inode, dir_attr);
+		kfree(msg->rpc_argp);
+	}
+	return 0;
+}
+
+static int
+nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		 struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs_fattr	old_dir_attr, new_dir_attr;
+	struct nfs3_renameargs	arg = {
+		.fromfh		= NFS_FH(old_dir),
+		.fromname	= old_name->name,
+		.fromlen	= old_name->len,
+		.tofh		= NFS_FH(new_dir),
+		.toname		= new_name->name,
+		.tolen		= new_name->len
+	};
+	struct nfs3_renameres	res = {
+		.fromattr	= &old_dir_attr,
+		.toattr		= &new_dir_attr
+	};
+	int			status;
+
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	old_dir_attr.valid = 0;
+	new_dir_attr.valid = 0;
+	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	nfs_refresh_inode(old_dir, &old_dir_attr);
+	nfs_refresh_inode(new_dir, &new_dir_attr);
+	dprintk("NFS reply rename: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr, fattr;
+	struct nfs3_linkargs	arg = {
+		.fromfh		= NFS_FH(inode),
+		.tofh		= NFS_FH(dir),
+		.toname		= name->name,
+		.tolen		= name->len
+	};
+	struct nfs3_linkres	res = {
+		.dir_attr	= &dir_attr,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  link %s\n", name->name);
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	nfs_refresh_inode(inode, &fattr);
+	dprintk("NFS reply link: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		  struct iattr *sattr, struct nfs_fh *fhandle,
+		  struct nfs_fattr *fattr)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_symlinkargs	arg = {
+		.fromfh		= NFS_FH(dir),
+		.fromname	= name->name,
+		.fromlen	= name->len,
+		.topath		= path->name,
+		.tolen		= path->len,
+		.sattr		= sattr
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	if (path->len > NFS3_MAXPATHLEN)
+		return -ENAMETOOLONG;
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply symlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs3_mkdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mkdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rmdir %s\n", name->name);
+	dir_attr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply rmdir: %d\n", status);
+	return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass the user buffer
+ * to the encode function, which installs it in the receive iovec.
+ * The decode function itself doesn't perform any decoding, it just makes
+ * sure the reply is syntactically correct.
+ *
+ * Also note that this implementation handles both plain readdir and
+ * readdirplus.
+ */
+static int
+nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+		  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs_fattr	dir_attr;
+	u32			*verf = NFS_COOKIEVERF(dir);
+	struct nfs3_readdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.cookie		= cookie,
+		.verf		= {verf[0], verf[1]},
+		.plus		= plus,
+		.count		= count,
+		.pages		= &page
+	};
+	struct nfs3_readdirres	res = {
+		.dir_attr	= &dir_attr,
+		.verf		= verf,
+		.plus		= plus
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred
+	};
+	int			status;
+
+	lock_kernel();
+
+	if (plus)
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+
+	dprintk("NFS call  readdir%s %d\n",
+			plus? "plus" : "", (unsigned int) cookie);
+
+	dir_attr.valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply readdir: %d\n", status);
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		dev_t rdev)
+{
+	struct nfs_fh fh;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs3_mknodargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr,
+		.rdev		= rdev
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fh,
+		.fattr		= &fattr
+	};
+	int status;
+
+	switch (sattr->ia_mode & S_IFMT) {
+	case S_IFBLK:	arg.type = NF3BLK;  break;
+	case S_IFCHR:	arg.type = NF3CHR;  break;
+	case S_IFIFO:	arg.type = NF3FIFO; break;
+	case S_IFSOCK:	arg.type = NF3SOCK; break;
+	default:	return -EINVAL;
+	}
+
+	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
+			MAJOR(rdev), MINOR(rdev));
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fh, &fattr);
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *stat)
+{
+	int	status;
+
+	dprintk("NFS call  fsstat\n");
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *info)
+{
+	int	status;
+
+	dprintk("NFS call  pathconf\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	dprintk("NFS reply pathconf: %d\n", status);
+	return status;
+}
+
+extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs3_read_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	/* Call back common NFS readpage processing */
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, &data->fattr);
+	nfs_readpage_result(task);
+}
+
+static void
+nfs3_proc_read_setup(struct nfs_read_data *data)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	data = (struct nfs_write_data *)task->tk_calldata;
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_writeback_done(task);
+}
+
+static void
+nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			stable;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+	data->args.stable = stable;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	data = (struct nfs_write_data *)task->tk_calldata;
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_commit_done(task);
+}
+
+static void
+nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static int
+nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+struct nfs_rpc_ops	nfs_v3_clientops = {
+	.version	= 3,			/* protocol version */
+	.dentry_ops	= &nfs_dentry_operations,
+	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.getroot	= nfs3_proc_get_root,
+	.getattr	= nfs3_proc_getattr,
+	.setattr	= nfs3_proc_setattr,
+	.lookup		= nfs3_proc_lookup,
+	.access		= nfs3_proc_access,
+	.readlink	= nfs3_proc_readlink,
+	.read		= nfs3_proc_read,
+	.write		= nfs3_proc_write,
+	.commit		= nfs3_proc_commit,
+	.create		= nfs3_proc_create,
+	.remove		= nfs3_proc_remove,
+	.unlink_setup	= nfs3_proc_unlink_setup,
+	.unlink_done	= nfs3_proc_unlink_done,
+	.rename		= nfs3_proc_rename,
+	.link		= nfs3_proc_link,
+	.symlink	= nfs3_proc_symlink,
+	.mkdir		= nfs3_proc_mkdir,
+	.rmdir		= nfs3_proc_rmdir,
+	.readdir	= nfs3_proc_readdir,
+	.mknod		= nfs3_proc_mknod,
+	.statfs		= nfs3_proc_statfs,
+	.fsinfo		= nfs3_proc_fsinfo,
+	.pathconf	= nfs3_proc_pathconf,
+	.decode_dirent	= nfs3_decode_dirent,
+	.read_setup	= nfs3_proc_read_setup,
+	.write_setup	= nfs3_proc_write_setup,
+	.commit_setup	= nfs3_proc_commit_setup,
+	.file_open	= nfs_open,
+	.file_release	= nfs_release,
+	.lock		= nfs3_proc_lock,
+};
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
new file mode 100644
index 00000000000..a3593d47e5a
--- /dev/null
+++ b/fs/nfs/nfs3xdr.c
@@ -0,0 +1,1023 @@
+/*
+ * linux/fs/nfs/nfs3xdr.c
+ *
+ * XDR functions to encode/decode NFSv3 RPC arguments and results.
+ *
+ * Copyright (C) 1996, 1997 Olaf Kirch
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+extern int			nfs_stat_to_errno(int);
+
+/*
+ * Declare the space requirements for NFS arguments and replies as
+ * number of 32bit-words
+ */
+#define NFS3_fhandle_sz		(1+16)
+#define NFS3_fh_sz		(NFS3_fhandle_sz)	/* shorthand */
+#define NFS3_sattr_sz		(15)
+#define NFS3_filename_sz	(1+(NFS3_MAXNAMLEN>>2))
+#define NFS3_path_sz		(1+(NFS3_MAXPATHLEN>>2))
+#define NFS3_fattr_sz		(21)
+#define NFS3_wcc_attr_sz		(6)
+#define NFS3_pre_op_attr_sz	(1+NFS3_wcc_attr_sz)
+#define NFS3_post_op_attr_sz	(1+NFS3_fattr_sz)
+#define NFS3_wcc_data_sz		(NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
+#define NFS3_fsstat_sz		
+#define NFS3_fsinfo_sz		
+#define NFS3_pathconf_sz		
+#define NFS3_entry_sz		(NFS3_filename_sz+3)
+
+#define NFS3_sattrargs_sz	(NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_diropargs_sz	(NFS3_fh_sz+NFS3_filename_sz)
+#define NFS3_accessargs_sz	(NFS3_fh_sz+1)
+#define NFS3_readlinkargs_sz	(NFS3_fh_sz)
+#define NFS3_readargs_sz	(NFS3_fh_sz+3)
+#define NFS3_writeargs_sz	(NFS3_fh_sz+5)
+#define NFS3_createargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
+#define NFS3_mkdirargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
+#define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
+#define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
+#define NFS3_readdirargs_sz	(NFS3_fh_sz+2)
+#define NFS3_commitargs_sz	(NFS3_fh_sz+3)
+
+#define NFS3_attrstat_sz	(1+NFS3_fattr_sz)
+#define NFS3_wccstat_sz		(1+NFS3_wcc_data_sz)
+#define NFS3_lookupres_sz	(1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
+#define NFS3_accessres_sz	(1+NFS3_post_op_attr_sz+1)
+#define NFS3_readlinkres_sz	(1+NFS3_post_op_attr_sz+1)
+#define NFS3_readres_sz		(1+NFS3_post_op_attr_sz+3)
+#define NFS3_writeres_sz	(1+NFS3_wcc_data_sz+4)
+#define NFS3_createres_sz	(1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_renameres_sz	(1+(2 * NFS3_wcc_data_sz))
+#define NFS3_linkres_sz		(1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_readdirres_sz	(1+NFS3_post_op_attr_sz+2)
+#define NFS3_fsstatres_sz	(1+NFS3_post_op_attr_sz+13)
+#define NFS3_fsinfores_sz	(1+NFS3_post_op_attr_sz+12)
+#define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
+#define NFS3_commitres_sz	(1+NFS3_wcc_data_sz+2)
+
+/*
+ * Map file type to S_IFMT bits
+ */
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+      { 0,		NFNON	},
+      { S_IFREG,	NFREG	},
+      { S_IFDIR,	NFDIR	},
+      { S_IFBLK,	NFBLK	},
+      { S_IFCHR,	NFCHR	},
+      { S_IFLNK,	NFLNK	},
+      { S_IFSOCK,	NFSOCK	},
+      { S_IFIFO,	NFFIFO	},
+      { 0,		NFBAD	}
+};
+
+/*
+ * Common NFS XDR functions as inlines
+ */
+static inline u32 *
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fh)
+{
+	return xdr_encode_array(p, fh->data, fh->size);
+}
+
+static inline u32 *
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
+{
+	if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
+		memcpy(fh->data, p, fh->size);
+		return p + XDR_QUADLEN(fh->size);
+	}
+	return NULL;
+}
+
+/*
+ * Encode/decode time.
+ */
+static inline u32 *
+xdr_encode_time3(u32 *p, struct timespec *timep)
+{
+	*p++ = htonl(timep->tv_sec);
+	*p++ = htonl(timep->tv_nsec);
+	return p;
+}
+
+static inline u32 *
+xdr_decode_time3(u32 *p, struct timespec *timep)
+{
+	timep->tv_sec = ntohl(*p++);
+	timep->tv_nsec = ntohl(*p++);
+	return p;
+}
+
+static u32 *
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+{
+	unsigned int	type, major, minor;
+	int		fmode;
+
+	type = ntohl(*p++);
+	if (type >= NF3BAD)
+		type = NF3BAD;
+	fmode = nfs_type2fmt[type].mode;
+	fattr->type = nfs_type2fmt[type].nfs2type;
+	fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
+	fattr->nlink = ntohl(*p++);
+	fattr->uid = ntohl(*p++);
+	fattr->gid = ntohl(*p++);
+	p = xdr_decode_hyper(p, &fattr->size);
+	p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
+
+	/* Turn remote device info into Linux-specific dev_t */
+	major = ntohl(*p++);
+	minor = ntohl(*p++);
+	fattr->rdev = MKDEV(major, minor);
+	if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
+		fattr->rdev = 0;
+
+	p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3);
+	p = xdr_decode_hyper(p, &fattr->fileid);
+	p = xdr_decode_time3(p, &fattr->atime);
+	p = xdr_decode_time3(p, &fattr->mtime);
+	p = xdr_decode_time3(p, &fattr->ctime);
+
+	/* Update the mode bits */
+	fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
+	fattr->timestamp = jiffies;
+	return p;
+}
+
+static inline u32 *
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+	if (attr->ia_valid & ATTR_MODE) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_mode);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_UID) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_uid);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_GID) {
+		*p++ = xdr_one;
+		*p++ = htonl(attr->ia_gid);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_SIZE) {
+		*p++ = xdr_one;
+		p = xdr_encode_hyper(p, (__u64) attr->ia_size);
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_ATIME_SET) {
+		*p++ = xdr_two;
+		p = xdr_encode_time3(p, &attr->ia_atime);
+	} else if (attr->ia_valid & ATTR_ATIME) {
+		*p++ = xdr_one;
+	} else {
+		*p++ = xdr_zero;
+	}
+	if (attr->ia_valid & ATTR_MTIME_SET) {
+		*p++ = xdr_two;
+		p = xdr_encode_time3(p, &attr->ia_mtime);
+	} else if (attr->ia_valid & ATTR_MTIME) {
+		*p++ = xdr_one;
+	} else {
+		*p++ = xdr_zero;
+	}
+	return p;
+}
+
+static inline u32 *
+xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr)
+{
+	p = xdr_decode_hyper(p, &fattr->pre_size);
+	p = xdr_decode_time3(p, &fattr->pre_mtime);
+	p = xdr_decode_time3(p, &fattr->pre_ctime);
+	fattr->valid |= NFS_ATTR_WCC;
+	return p;
+}
+
+static inline u32 *
+xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr)
+{
+	if (*p++)
+		p = xdr_decode_fattr(p, fattr);
+	return p;
+}
+
+static inline u32 *
+xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr)
+{
+	if (*p++)
+		return xdr_decode_wcc_attr(p, fattr);
+	return p;
+}
+
+
+static inline u32 *
+xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
+{
+	p = xdr_decode_pre_op_attr(p, fattr);
+	return xdr_decode_post_op_attr(p, fattr);
+}
+
+/*
+ * NFS encode functions
+ */
+
+/*
+ * Encode file handle argument
+ */
+static int
+nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+{
+	p = xdr_encode_fhandle(p, fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SETATTR arguments
+ */
+static int
+nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_sattr(p, args->sattr);
+	*p++ = htonl(args->guard);
+	if (args->guard)
+		p = xdr_encode_time3(p, &args->guardtime);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode directory ops argument
+ */
+static int
+nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode access() argument
+ */
+static int
+nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->access);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Arguments to a READ call. Since we read data directly into the page
+ * cache, we also set up the reply iovec here so that iov[1] points
+ * exactly to the page we want to fetch.
+ */
+static int
+nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	unsigned int replen;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = htonl(count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Write arguments. Splice the buffer to be written into the iovec.
+ */
+static int
+nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = htonl(count);
+	*p++ = htonl(args->stable);
+	*p++ = htonl(count);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	/* Copy the page array */
+	xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Encode CREATE arguments
+ */
+static int
+nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+
+	*p++ = htonl(args->createmode);
+	if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
+		*p++ = args->verifier[0];
+		*p++ = args->verifier[1];
+	} else
+		p = xdr_encode_sattr(p, args->sattr);
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode MKDIR arguments
+ */
+static int
+nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SYMLINK arguments
+ */
+static int
+nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_sattr(p, args->sattr);
+	p = xdr_encode_array(p, args->topath, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode MKNOD arguments
+ */
+static int
+nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	*p++ = htonl(args->type);
+	p = xdr_encode_sattr(p, args->sattr);
+	if (args->type == NF3CHR || args->type == NF3BLK) {
+		*p++ = htonl(MAJOR(args->rdev));
+		*p++ = htonl(MINOR(args->rdev));
+	}
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode RENAME arguments
+ */
+static int
+nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode LINK arguments
+ */
+static int
+nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode arguments to readdir call
+ */
+static int
+nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	unsigned int replen;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->cookie);
+	*p++ = args->verf[0];
+	*p++ = args->verf[1];
+	if (args->plus) {
+		/* readdirplus: need dircount + buffer size.
+		 * We just make sure we make dircount big enough */
+		*p++ = htonl(count >> 3);
+	}
+	*p++ = htonl(count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
+	return 0;
+}
+
+/*
+ * Decode the result of a readdir call.
+ * We just check for syntactical correctness.
+ */
+static int
+nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	struct page **page;
+	int hdrlen, recvd;
+	int status, nr;
+	unsigned int len, pglen;
+	u32 *entry, *end, *kaddr;
+
+	status = ntohl(*p++);
+	/* Decode post_op_attrs */
+	p = xdr_decode_post_op_attr(p, res->dir_attr);
+	if (status)
+		return -nfs_stat_to_errno(status);
+	/* Decode verifier cookie */
+	if (res->verf) {
+		res->verf[0] = *p++;
+		res->verf[1] = *p++;
+	} else {
+		p += 2;
+	}
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+
+	pglen = rcvbuf->page_len;
+	recvd = rcvbuf->len - hdrlen;
+	if (pglen > recvd)
+		pglen = recvd;
+	page = rcvbuf->pages;
+	kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
+	end = (u32 *)((char *)p + pglen);
+	entry = p;
+	for (nr = 0; *p++; nr++) {
+		if (p + 3 > end)
+			goto short_pkt;
+		p += 2;				/* inode # */
+		len = ntohl(*p++);		/* string length */
+		p += XDR_QUADLEN(len) + 2;	/* name + cookie */
+		if (len > NFS3_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
+						len);
+			goto err_unmap;
+		}
+
+		if (res->plus) {
+			/* post_op_attr */
+			if (p + 2 > end)
+				goto short_pkt;
+			if (*p++) {
+				p += 21;
+				if (p + 1 > end)
+					goto short_pkt;
+			}
+			/* post_op_fh3 */
+			if (*p++) {
+				if (p + 1 > end)
+					goto short_pkt;
+				len = ntohl(*p++);
+				if (len > NFS3_FHSIZE) {
+					printk(KERN_WARNING "NFS: giant filehandle in "
+						"readdir (len %x)!\n", len);
+					goto err_unmap;
+				}
+				p += XDR_QUADLEN(len);
+			}
+		}
+
+		if (p + 2 > end)
+			goto short_pkt;
+		entry = p;
+	}
+	if (!nr && (entry[0] != 0 || entry[1] == 0))
+		goto short_pkt;
+ out:
+	kunmap_atomic(kaddr, KM_USER0);
+	return nr;
+ short_pkt:
+	entry[0] = entry[1] = 0;
+	/* truncate listing ? */
+	if (!nr) {
+		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
+	goto out;
+err_unmap:
+	nr = -errno_NFSERR_IO;
+	goto out;
+}
+
+u32 *
+nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	struct nfs_entry old = *entry;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	p = xdr_decode_hyper(p, &entry->ino);
+	entry->len  = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+	entry->prev_cookie = entry->cookie;
+	p = xdr_decode_hyper(p, &entry->cookie);
+
+	if (plus) {
+		entry->fattr->valid = 0;
+		p = xdr_decode_post_op_attr(p, entry->fattr);
+		/* In fact, a post_op_fh3: */
+		if (*p++) {
+			p = xdr_decode_fhandle(p, entry->fh);
+			/* Ugh -- server reply was truncated */
+			if (p == NULL) {
+				dprintk("NFS: FH truncated\n");
+				*entry = old;
+				return ERR_PTR(-EAGAIN);
+			}
+		} else
+			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
+	}
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+/*
+ * Encode COMMIT arguments
+ */
+static int
+nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = htonl(args->count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * NFS XDR decode functions
+ */
+
+/*
+ * Decode attrstat reply.
+ */
+static int
+nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	xdr_decode_fattr(p, fattr);
+	return 0;
+}
+
+/*
+ * Decode status+wcc_data reply
+ * SATTR, REMOVE, RMDIR
+ */
+static int
+nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		status = -nfs_stat_to_errno(status);
+	xdr_decode_wcc_data(p, fattr);
+	return status;
+}
+
+/*
+ * Decode LOOKUP reply
+ */
+static int
+nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++))) {
+		status = -nfs_stat_to_errno(status);
+	} else {
+		if (!(p = xdr_decode_fhandle(p, res->fh)))
+			return -errno_NFSERR_IO;
+		p = xdr_decode_post_op_attr(p, res->fattr);
+	}
+	xdr_decode_post_op_attr(p, res->dir_attr);
+	return status;
+}
+
+/*
+ * Decode ACCESS reply
+ */
+static int
+nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
+{
+	int	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	if (status)
+		return -nfs_stat_to_errno(status);
+	res->access = ntohl(*p++);
+	return 0;
+}
+
+static int
+nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
+	return 0;
+}
+
+/*
+ * Decode READLINK reply
+ */
+static int
+nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	int hdrlen, len, recvd;
+	char	*kaddr;
+	int	status;
+
+	status = ntohl(*p++);
+	p = xdr_decode_post_op_attr(p, fattr);
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	/* Convert length of symlink */
+	len = ntohl(*p++);
+	if (len >= rcvbuf->page_len || len <= 0) {
+		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		return -ENAMETOOLONG;
+	}
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (recvd < len) {
+		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+				"count %u > recvd %u\n", len, recvd);
+		return -EIO;
+	}
+
+	/* NULL terminate the string we got */
+	kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+	kaddr[len+rcvbuf->page_base] = '\0';
+	kunmap_atomic(kaddr, KM_USER0);
+	return 0;
+}
+
+/*
+ * Decode READ reply
+ */
+static int
+nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+{
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int	status, count, ocount, recvd, hdrlen;
+
+	status = ntohl(*p++);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	/* Decode reply could and EOF flag. NFSv3 is somewhat redundant
+	 * in that it puts the count both in the res struct and in the
+	 * opaque data count. */
+	count    = ntohl(*p++);
+	res->eof = ntohl(*p++);
+	ocount   = ntohl(*p++);
+
+	if (ocount != count) {
+		printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
+		return -errno_NFSERR_IO;
+	}
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READ reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+       		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READ header is short. iovec will be shifted.\n");
+		xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+	}
+
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (count > recvd) {
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
+			"count %d > recvd %d\n", count, recvd);
+		count = recvd;
+		res->eof = 0;
+	}
+
+	if (count < res->count)
+		res->count = count;
+
+	return count;
+}
+
+/*
+ * Decode WRITE response
+ */
+static int
+nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	int	status;
+
+	status = ntohl(*p++);
+	p = xdr_decode_wcc_data(p, res->fattr);
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->count = ntohl(*p++);
+	res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
+	res->verf->verifier[0] = *p++;
+	res->verf->verifier[1] = *p++;
+
+	return res->count;
+}
+
+/*
+ * Decode a CREATE response
+ */
+static int
+nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+{
+	int	status;
+
+	status = ntohl(*p++);
+	if (status == 0) {
+		if (*p++) {
+			if (!(p = xdr_decode_fhandle(p, res->fh)))
+				return -errno_NFSERR_IO;
+			p = xdr_decode_post_op_attr(p, res->fattr);
+		} else {
+			memset(res->fh, 0, sizeof(*res->fh));
+			/* Do decode post_op_attr but set it to NULL */
+			p = xdr_decode_post_op_attr(p, res->fattr);
+			res->fattr->valid = 0;
+		}
+	} else {
+		status = -nfs_stat_to_errno(status);
+	}
+	p = xdr_decode_wcc_data(p, res->dir_attr);
+	return status;
+}
+
+/*
+ * Decode RENAME reply
+ */
+static int
+nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	p = xdr_decode_wcc_data(p, res->fromattr);
+	p = xdr_decode_wcc_data(p, res->toattr);
+	return status;
+}
+
+/*
+ * Decode LINK reply
+ */
+static int
+nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	p = xdr_decode_wcc_data(p, res->dir_attr);
+	return status;
+}
+
+/*
+ * Decode FSSTAT reply
+ */
+static int
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
+{
+	int		status;
+
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	p = xdr_decode_hyper(p, &res->tbytes);
+	p = xdr_decode_hyper(p, &res->fbytes);
+	p = xdr_decode_hyper(p, &res->abytes);
+	p = xdr_decode_hyper(p, &res->tfiles);
+	p = xdr_decode_hyper(p, &res->ffiles);
+	p = xdr_decode_hyper(p, &res->afiles);
+
+	/* ignore invarsec */
+	return 0;
+}
+
+/*
+ * Decode FSINFO reply
+ */
+static int
+nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+{
+	int		status;
+
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->rtmax  = ntohl(*p++);
+	res->rtpref = ntohl(*p++);
+	res->rtmult = ntohl(*p++);
+	res->wtmax  = ntohl(*p++);
+	res->wtpref = ntohl(*p++);
+	res->wtmult = ntohl(*p++);
+	res->dtpref = ntohl(*p++);
+	p = xdr_decode_hyper(p, &res->maxfilesize);
+
+	/* ignore time_delta and properties */
+	res->lease_time = 0;
+	return 0;
+}
+
+/*
+ * Decode PATHCONF reply
+ */
+static int
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
+{
+	int		status;
+
+	status = ntohl(*p++);
+
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+	res->max_link = ntohl(*p++);
+	res->max_namelen = ntohl(*p++);
+
+	/* ignore remaining fields */
+	return 0;
+}
+
+/*
+ * Decode COMMIT reply
+ */
+static int
+nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	int		status;
+
+	status = ntohl(*p++);
+	p = xdr_decode_wcc_data(p, res->fattr);
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+
+	res->verf->verifier[0] = *p++;
+	res->verf->verifier[1] = *p++;
+	return 0;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype, timer)				\
+[NFS3PROC_##proc] = {							\
+	.p_proc      = NFS3PROC_##proc,					\
+	.p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,			\
+	.p_decode    = (kxdrproc_t) nfs3_xdr_##restype,			\
+	.p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,	\
+	.p_timer     = timer						\
+	}
+
+struct rpc_procinfo	nfs3_procedures[] = {
+  PROC(GETATTR,		fhandle,	attrstat, 1),
+  PROC(SETATTR, 	sattrargs,	wccstat, 0),
+  PROC(LOOKUP,		diropargs,	lookupres, 2),
+  PROC(ACCESS,		accessargs,	accessres, 1),
+  PROC(READLINK,	readlinkargs,	readlinkres, 3),
+  PROC(READ,		readargs,	readres, 3),
+  PROC(WRITE,		writeargs,	writeres, 4),
+  PROC(CREATE,		createargs,	createres, 0),
+  PROC(MKDIR,		mkdirargs,	createres, 0),
+  PROC(SYMLINK,		symlinkargs,	createres, 0),
+  PROC(MKNOD,		mknodargs,	createres, 0),
+  PROC(REMOVE,		diropargs,	wccstat, 0),
+  PROC(RMDIR,		diropargs,	wccstat, 0),
+  PROC(RENAME,		renameargs,	renameres, 0),
+  PROC(LINK,		linkargs,	linkres, 0),
+  PROC(READDIR,		readdirargs,	readdirres, 3),
+  PROC(READDIRPLUS,	readdirargs,	readdirres, 3),
+  PROC(FSSTAT,		fhandle,	fsstatres, 0),
+  PROC(FSINFO,  	fhandle,	fsinfores, 0),
+  PROC(PATHCONF,	fhandle,	pathconfres, 0),
+  PROC(COMMIT,		commitargs,	commitres, 5),
+};
+
+struct rpc_version		nfs_version3 = {
+	.number			= 3,
+	.nrprocs		= sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+	.procs			= nfs3_procedures
+};
+
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 00000000000..1d5cb3e80c3
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,2786 @@
+/*
+ *  fs/nfs/nfs4proc.c
+ *
+ *  Client-side procedure declarations for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+#define NFS4_POLL_RETRY_MIN	(1*HZ)
+#define NFS4_POLL_RETRY_MAX	(15*HZ)
+
+static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
+static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+extern struct rpc_procinfo nfs4_procedures[];
+
+extern nfs4_stateid zero_stateid;
+
+/* Prevent leaks of NFSv4 errors into userland */
+int nfs4_map_errors(int err)
+{
+	if (err < -1000) {
+		dprintk("%s could not handle NFSv4 error %d\n",
+				__FUNCTION__, -err);
+		return -EIO;
+	}
+	return err;
+}
+
+/*
+ * This is our standard bitmap for GETATTR requests.
+ */
+const u32 nfs4_fattr_bitmap[2] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY
+};
+
+const u32 nfs4_statfs_bitmap[2] = {
+	FATTR4_WORD0_FILES_AVAIL
+	| FATTR4_WORD0_FILES_FREE
+	| FATTR4_WORD0_FILES_TOTAL,
+	FATTR4_WORD1_SPACE_AVAIL
+	| FATTR4_WORD1_SPACE_FREE
+	| FATTR4_WORD1_SPACE_TOTAL
+};
+
+u32 nfs4_pathconf_bitmap[2] = {
+	FATTR4_WORD0_MAXLINK
+	| FATTR4_WORD0_MAXNAME,
+	0
+};
+
+const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
+			| FATTR4_WORD0_MAXREAD
+			| FATTR4_WORD0_MAXWRITE
+			| FATTR4_WORD0_LEASE_TIME,
+			0
+};
+
+static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
+		struct nfs4_readdir_arg *readdir)
+{
+	u32 *start, *p;
+
+	BUG_ON(readdir->count < 80);
+	if (cookie > 2) {
+		readdir->cookie = (cookie > 2) ? cookie : 0;
+		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
+		return;
+	}
+
+	readdir->cookie = 0;
+	memset(&readdir->verifier, 0, sizeof(readdir->verifier));
+	if (cookie == 2)
+		return;
+	
+	/*
+	 * NFSv4 servers do not return entries for '.' and '..'
+	 * Therefore, we fake these entries here.  We let '.'
+	 * have cookie 0 and '..' have cookie 1.  Note that
+	 * when talking to the server, we always send cookie 0
+	 * instead of 1 or 2.
+	 */
+	start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
+	
+	if (cookie == 0) {
+		*p++ = xdr_one;                                  /* next */
+		*p++ = xdr_zero;                   /* cookie, first word */
+		*p++ = xdr_one;                   /* cookie, second word */
+		*p++ = xdr_one;                             /* entry len */
+		memcpy(p, ".\0\0\0", 4);                        /* entry */
+		p++;
+		*p++ = xdr_one;                         /* bitmap length */
+		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+		*p++ = htonl(8);              /* attribute buffer length */
+		p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
+	}
+	
+	*p++ = xdr_one;                                  /* next */
+	*p++ = xdr_zero;                   /* cookie, first word */
+	*p++ = xdr_two;                   /* cookie, second word */
+	*p++ = xdr_two;                             /* entry len */
+	memcpy(p, "..\0\0", 4);                         /* entry */
+	p++;
+	*p++ = xdr_one;                         /* bitmap length */
+	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
+	*p++ = htonl(8);              /* attribute buffer length */
+	p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
+
+	readdir->pgbase = (char *)p - (char *)start;
+	readdir->count -= readdir->pgbase;
+	kunmap_atomic(start, KM_USER0);
+}
+
+static void
+renew_lease(struct nfs_server *server, unsigned long timestamp)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+	spin_lock(&clp->cl_lock);
+	if (time_before(clp->cl_last_renewal,timestamp))
+		clp->cl_last_renewal = timestamp;
+	spin_unlock(&clp->cl_lock);
+}
+
+static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (cinfo->before == nfsi->change_attr && cinfo->atomic)
+		nfsi->change_attr = cinfo->after;
+}
+
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+	struct inode *inode = state->inode;
+
+	open_flags &= (FMODE_READ|FMODE_WRITE);
+	/* Protect against nfs4_find_state() */
+	spin_lock(&inode->i_lock);
+	state->state |= open_flags;
+	/* NB! List reordering - see the reclaim code for why.  */
+	if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
+		list_move(&state->open_states, &state->owner->so_states);
+	if (open_flags & FMODE_READ)
+		state->nreaders++;
+	memcpy(&state->stateid, stateid, sizeof(state->stateid));
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * OPEN_RECLAIM:
+ * 	reclaim state on the server after a reboot.
+ * 	Assumes caller is holding the sp->so_sem
+ */
+static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+	struct nfs_openargs o_arg = {
+		.fh = NFS_FH(inode),
+		.seqid = sp->so_seqid,
+		.id = sp->so_id,
+		.open_flags = state->state,
+		.clientid = server->nfs4_state->cl_clientid,
+		.claim = NFS4_OPEN_CLAIM_PREVIOUS,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_openres o_res = {
+		.server = server,	/* Grrr */
+	};
+	struct rpc_message msg = {
+		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
+		.rpc_argp       = &o_arg,
+		.rpc_resp	= &o_res,
+		.rpc_cred	= sp->so_cred,
+	};
+	int status;
+
+	if (delegation != NULL) {
+		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+			memcpy(&state->stateid, &delegation->stateid,
+					sizeof(state->stateid));
+			set_bit(NFS_DELEGATED_STATE, &state->flags);
+			return 0;
+		}
+		o_arg.u.delegation_type = delegation->type;
+	}
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_seqid(status, sp);
+	if (status == 0) {
+		memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
+		if (o_res.delegation_type != 0) {
+			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
+			/* Did the server issue an immediate delegation recall? */
+			if (o_res.do_recall)
+				nfs_async_inode_return_delegation(inode, &o_res.stateid);
+		}
+	}
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
+	/* Ensure we update the inode attributes */
+	NFS_CACHEINV(inode);
+	return status;
+}
+
+static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs4_open_reclaim(sp, state);
+		switch (err) {
+			case 0:
+			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_EXPIRED:
+				return err;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+{
+	struct nfs4_state_owner  *sp  = state->owner;
+	struct inode *inode = dentry->d_inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct dentry *parent = dget_parent(dentry);
+	struct nfs_openargs arg = {
+		.fh = NFS_FH(parent->d_inode),
+		.clientid = server->nfs4_state->cl_clientid,
+		.name = &dentry->d_name,
+		.id = sp->so_id,
+		.server = server,
+		.bitmask = server->attr_bitmask,
+		.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR,
+	};
+	struct nfs_openres res = {
+		.server = server,
+	};
+	struct 	rpc_message msg = {
+		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
+		.rpc_argp       = &arg,
+		.rpc_resp       = &res,
+		.rpc_cred	= sp->so_cred,
+	};
+	int status = 0;
+
+	down(&sp->so_sema);
+	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+		goto out;
+	if (state->state == 0)
+		goto out;
+	arg.seqid = sp->so_seqid;
+	arg.open_flags = state->state;
+	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_seqid(status, sp);
+	if (status >= 0) {
+		memcpy(state->stateid.data, res.stateid.data,
+				sizeof(state->stateid.data));
+		clear_bit(NFS_DELEGATED_STATE, &state->flags);
+	}
+out:
+	up(&sp->so_sema);
+	dput(parent);
+	return status;
+}
+
+int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+{
+	struct nfs4_exception exception = { };
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	int err;
+	do {
+		err = _nfs4_open_delegation_recall(dentry, state);
+		switch (err) {
+			case 0:
+				return err;
+			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_EXPIRED:
+				/* Don't recall a delegation if it was lost */
+				nfs4_schedule_state_recovery(server->nfs4_state);
+				return err;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
+static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
+{
+	struct nfs_open_confirmargs arg = {
+		.fh             = fh,
+		.seqid          = sp->so_seqid,
+		.stateid	= *stateid,
+	};
+	struct nfs_open_confirmres res;
+	struct 	rpc_message msg = {
+		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+		.rpc_argp       = &arg,
+		.rpc_resp       = &res,
+		.rpc_cred	= sp->so_cred,
+	};
+	int status;
+
+	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_seqid(status, sp);
+	if (status >= 0)
+		memcpy(stateid, &res.stateid, sizeof(*stateid));
+	return status;
+}
+
+static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+		.rpc_argp = o_arg,
+		.rpc_resp = o_res,
+		.rpc_cred = sp->so_cred,
+	};
+	int status;
+
+	/* Update sequence id. The caller must serialize! */
+	o_arg->seqid = sp->so_seqid;
+	o_arg->id = sp->so_id;
+	o_arg->clientid = sp->so_client->cl_clientid;
+
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_seqid(status, sp);
+	if (status != 0)
+		goto out;
+	update_changeattr(dir, &o_res->cinfo);
+	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
+		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
+				sp, &o_res->stateid);
+		if (status != 0)
+			goto out;
+	}
+	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+		status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+out:
+	return status;
+}
+
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
+{
+	struct nfs_access_entry cache;
+	int mask = 0;
+	int status;
+
+	if (openflags & FMODE_READ)
+		mask |= MAY_READ;
+	if (openflags & FMODE_WRITE)
+		mask |= MAY_WRITE;
+	status = nfs_access_get_cached(inode, cred, &cache);
+	if (status == 0)
+		goto out;
+
+	/* Be clever: ask server to check for all possible rights */
+	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+	cache.cred = cred;
+	cache.jiffies = jiffies;
+	status = _nfs4_proc_access(inode, &cache);
+	if (status != 0)
+		return status;
+	nfs_access_add_cache(inode, &cache);
+out:
+	if ((cache.mask & mask) == mask)
+		return 0;
+	return -EACCES;
+}
+
+/*
+ * OPEN_EXPIRED:
+ * 	reclaim state on the server after a network partition.
+ * 	Assumes caller holds the appropriate lock
+ */
+static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+{
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir = parent->d_inode;
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+	struct nfs_fattr        f_attr = {
+		.valid = 0,
+	};
+	struct nfs_openargs o_arg = {
+		.fh = NFS_FH(dir),
+		.open_flags = state->state,
+		.name = &dentry->d_name,
+		.bitmask = server->attr_bitmask,
+		.claim = NFS4_OPEN_CLAIM_NULL,
+	};
+	struct nfs_openres o_res = {
+		.f_attr = &f_attr,
+		.server = server,
+	};
+	int status = 0;
+
+	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+		status = _nfs4_do_access(inode, sp->so_cred, state->state);
+		if (status < 0)
+			goto out;
+		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
+		set_bit(NFS_DELEGATED_STATE, &state->flags);
+		goto out;
+	}
+	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
+	if (status != 0)
+		goto out_nodeleg;
+	/* Check if files differ */
+	if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT))
+		goto out_stale;
+	/* Has the file handle changed? */
+	if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) {
+		/* Verify if the change attributes are the same */
+		if (f_attr.change_attr != NFS_I(inode)->change_attr)
+			goto out_stale;
+		if (nfs_size_to_loff_t(f_attr.size) != inode->i_size)
+			goto out_stale;
+		/* Lets just pretend that this is the same file */
+		nfs_copy_fh(NFS_FH(inode), &o_res.fh);
+		NFS_I(inode)->fileid = f_attr.fileid;
+	}
+	memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
+	if (o_res.delegation_type != 0) {
+		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM))
+			nfs_inode_set_delegation(inode, sp->so_cred, &o_res);
+		else
+			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
+	}
+out_nodeleg:
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
+out:
+	dput(parent);
+	return status;
+out_stale:
+	status = -ESTALE;
+	/* Invalidate the state owner so we don't ever use it again */
+	nfs4_drop_state_owner(sp);
+	d_drop(dentry);
+	/* Should we be trying to close that stateid? */
+	goto out_nodeleg;
+}
+
+static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	struct nfs_inode *nfsi = NFS_I(state->inode);
+	struct nfs_open_context *ctx;
+	int status;
+
+	spin_lock(&state->inode->i_lock);
+	list_for_each_entry(ctx, &nfsi->open_files, list) {
+		if (ctx->state != state)
+			continue;
+		get_nfs_open_context(ctx);
+		spin_unlock(&state->inode->i_lock);
+		status = _nfs4_open_expired(sp, state, ctx->dentry);
+		put_nfs_open_context(ctx);
+		return status;
+	}
+	spin_unlock(&state->inode->i_lock);
+	return -ENOENT;
+}
+
+/*
+ * Returns an nfs4_state + an extra reference to the inode
+ */
+static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
+{
+	struct nfs_delegation *delegation;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_state_owner *sp = NULL;
+	struct nfs4_state *state = NULL;
+	int open_flags = flags & (FMODE_READ|FMODE_WRITE);
+	int err;
+
+	/* Protect against reboot recovery - NOTE ORDER! */
+	down_read(&clp->cl_sem);
+	/* Protect against delegation recall */
+	down_read(&nfsi->rwsem);
+	delegation = NFS_I(inode)->delegation;
+	err = -ENOENT;
+	if (delegation == NULL || (delegation->type & open_flags) != open_flags)
+		goto out_err;
+	err = -ENOMEM;
+	if (!(sp = nfs4_get_state_owner(server, cred))) {
+		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
+		goto out_err;
+	}
+	down(&sp->so_sema);
+	state = nfs4_get_open_state(inode, sp);
+	if (state == NULL)
+		goto out_err;
+
+	err = -ENOENT;
+	if ((state->state & open_flags) == open_flags) {
+		spin_lock(&inode->i_lock);
+		if (open_flags & FMODE_READ)
+			state->nreaders++;
+		if (open_flags & FMODE_WRITE)
+			state->nwriters++;
+		spin_unlock(&inode->i_lock);
+		goto out_ok;
+	} else if (state->state != 0)
+		goto out_err;
+
+	lock_kernel();
+	err = _nfs4_do_access(inode, cred, open_flags);
+	unlock_kernel();
+	if (err != 0)
+		goto out_err;
+	set_bit(NFS_DELEGATED_STATE, &state->flags);
+	update_open_stateid(state, &delegation->stateid, open_flags);
+out_ok:
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	igrab(inode);
+	*res = state;
+	return 0; 
+out_err:
+	if (sp != NULL) {
+		if (state != NULL)
+			nfs4_put_open_state(state);
+		up(&sp->so_sema);
+		nfs4_put_state_owner(sp);
+	}
+	up_read(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	return err;
+}
+
+static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+{
+	struct nfs4_exception exception = { };
+	struct nfs4_state *res;
+	int err;
+
+	do {
+		err = _nfs4_open_delegated(inode, flags, cred, &res);
+		if (err == 0)
+			break;
+		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
+					err, &exception));
+	} while (exception.retry);
+	return res;
+}
+
+/*
+ * Returns an nfs4_state + an referenced inode
+ */
+static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+{
+	struct nfs4_state_owner  *sp;
+	struct nfs4_state     *state = NULL;
+	struct nfs_server       *server = NFS_SERVER(dir);
+	struct nfs4_client *clp = server->nfs4_state;
+	struct inode *inode = NULL;
+	int                     status;
+	struct nfs_fattr        f_attr = {
+		.valid          = 0,
+	};
+	struct nfs_openargs o_arg = {
+		.fh             = NFS_FH(dir),
+		.open_flags	= flags,
+		.name           = &dentry->d_name,
+		.server         = server,
+		.bitmask = server->attr_bitmask,
+		.claim = NFS4_OPEN_CLAIM_NULL,
+	};
+	struct nfs_openres o_res = {
+		.f_attr         = &f_attr,
+		.server         = server,
+	};
+
+	/* Protect against reboot recovery conflicts */
+	down_read(&clp->cl_sem);
+	status = -ENOMEM;
+	if (!(sp = nfs4_get_state_owner(server, cred))) {
+		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+		goto out_err;
+	}
+	if (flags & O_EXCL) {
+		u32 *p = (u32 *) o_arg.u.verifier.data;
+		p[0] = jiffies;
+		p[1] = current->pid;
+	} else
+		o_arg.u.attrs = sattr;
+	/* Serialization for the sequence id */
+	down(&sp->so_sema);
+
+	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
+	if (status != 0)
+		goto out_err;
+
+	status = -ENOMEM;
+	inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr);
+	if (!inode)
+		goto out_err;
+	state = nfs4_get_open_state(inode, sp);
+	if (!state)
+		goto out_err;
+	update_open_stateid(state, &o_res.stateid, flags);
+	if (o_res.delegation_type != 0)
+		nfs_inode_set_delegation(inode, cred, &o_res);
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&clp->cl_sem);
+	*res = state;
+	return 0;
+out_err:
+	if (sp != NULL) {
+		if (state != NULL)
+			nfs4_put_open_state(state);
+		up(&sp->so_sema);
+		nfs4_put_state_owner(sp);
+	}
+	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
+	up_read(&clp->cl_sem);
+	if (inode != NULL)
+		iput(inode);
+	*res = NULL;
+	return status;
+}
+
+
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+{
+	struct nfs4_exception exception = { };
+	struct nfs4_state *res;
+	int status;
+
+	do {
+		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+		if (status == 0)
+			break;
+		/* NOTE: BAD_SEQID means the server and client disagree about the
+		 * book-keeping w.r.t. state-changing operations
+		 * (OPEN/CLOSE/LOCK/LOCKU...)
+		 * It is actually a sign of a bug on the client or on the server.
+		 *
+		 * If we receive a BAD_SEQID error in the particular case of
+		 * doing an OPEN, we assume that nfs4_increment_seqid() will
+		 * have unhashed the old state_owner for us, and that we can
+		 * therefore safely retry using a new one. We should still warn
+		 * the user though...
+		 */
+		if (status == -NFS4ERR_BAD_SEQID) {
+			printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+			exception.retry = 1;
+			continue;
+		}
+		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+					status, &exception));
+	} while (exception.retry);
+	return res;
+}
+
+static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+                struct nfs_fh *fhandle, struct iattr *sattr,
+                struct nfs4_state *state)
+{
+        struct nfs_setattrargs  arg = {
+                .fh             = fhandle,
+                .iap            = sattr,
+		.server		= server,
+		.bitmask = server->attr_bitmask,
+        };
+        struct nfs_setattrres  res = {
+		.fattr		= fattr,
+		.server		= server,
+        };
+        struct rpc_message msg = {
+                .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+                .rpc_argp       = &arg,
+                .rpc_resp       = &res,
+        };
+
+        fattr->valid = 0;
+
+	if (state != NULL)
+		msg.rpc_cred = state->owner->so_cred;
+	if (sattr->ia_valid & ATTR_SIZE)
+		nfs4_copy_stateid(&arg.stateid, state, NULL);
+	else
+		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+                struct nfs_fh *fhandle, struct iattr *sattr,
+                struct nfs4_state *state)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_do_setattr(server, fattr, fhandle, sattr,
+					state),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+struct nfs4_closedata {
+	struct inode *inode;
+	struct nfs4_state *state;
+	struct nfs_closeargs arg;
+	struct nfs_closeres res;
+};
+
+static void nfs4_close_done(struct rpc_task *task)
+{
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+	struct nfs_server *server = NFS_SERVER(calldata->inode);
+
+        /* hmm. we are done with the inode, and in the process of freeing
+	 * the state_owner. we keep this around to process errors
+	 */
+	nfs4_increment_seqid(task->tk_status, sp);
+	switch (task->tk_status) {
+		case 0:
+			memcpy(&state->stateid, &calldata->res.stateid,
+					sizeof(state->stateid));
+			break;
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			state->state = calldata->arg.open_flags;
+			nfs4_schedule_state_recovery(server->nfs4_state);
+			break;
+		default:
+			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
+			}
+	}
+	state->state = calldata->arg.open_flags;
+	nfs4_put_open_state(state);
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&server->nfs4_state->cl_sem);
+	kfree(calldata);
+}
+
+static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+		.rpc_argp = &calldata->arg,
+		.rpc_resp = &calldata->res,
+		.rpc_cred = calldata->state->owner->so_cred,
+	};
+	if (calldata->arg.open_flags != 0)
+		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+}
+
+/* 
+ * It is possible for data to be read/written from a mem-mapped file 
+ * after the sys_close call (which hits the vfs layer as a flush).
+ * This means that we can't safely call nfsv4 close on a file until 
+ * the inode is cleared. This in turn means that we are not good
+ * NFSv4 citizens - we do not indicate to the server to update the file's 
+ * share state even when we are done with one of the three share 
+ * stateid's in the inode.
+ *
+ * NOTE: Caller must be holding the sp->so_owner semaphore!
+ */
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+{
+	struct nfs4_closedata *calldata;
+	int status;
+
+	/* Tell caller we're done */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		state->state = mode;
+		return 0;
+	}
+	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->inode = inode;
+	calldata->state = state;
+	calldata->arg.fh = NFS_FH(inode);
+	/* Serialization for the sequence id */
+	calldata->arg.seqid = state->owner->so_seqid;
+	calldata->arg.open_flags = mode;
+	memcpy(&calldata->arg.stateid, &state->stateid,
+			sizeof(calldata->arg.stateid));
+	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
+	/*
+	 * Return -EINPROGRESS on success in order to indicate to the
+	 * caller that an asynchronous RPC call has been launched, and
+	 * that it will release the semaphores on completion.
+	 */
+	return (status == 0) ? -EINPROGRESS : status;
+}
+
+struct inode *
+nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+	struct iattr attr;
+	struct rpc_cred *cred;
+	struct nfs4_state *state;
+
+	if (nd->flags & LOOKUP_CREATE) {
+		attr.ia_mode = nd->intent.open.create_mode;
+		attr.ia_valid = ATTR_MODE;
+		if (!IS_POSIXACL(dir))
+			attr.ia_mode &= ~current->fs->umask;
+	} else {
+		attr.ia_valid = 0;
+		BUG_ON(nd->intent.open.flags & O_CREAT);
+	}
+
+	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return (struct inode *)cred;
+	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+	put_rpccred(cred);
+	if (IS_ERR(state))
+		return (struct inode *)state;
+	return state->inode;
+}
+
+int
+nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
+{
+	struct rpc_cred *cred;
+	struct nfs4_state *state;
+	struct inode *inode;
+
+	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+	if (IS_ERR(state))
+		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+	put_rpccred(cred);
+	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
+		return 1;
+	if (IS_ERR(state))
+		return 0;
+	inode = state->inode;
+	if (inode == dentry->d_inode) {
+		iput(inode);
+		return 1;
+	}
+	d_drop(dentry);
+	nfs4_close_state(state, openflags);
+	iput(inode);
+	return 0;
+}
+
+
+static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+{
+	struct nfs4_server_caps_res res = {};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
+		.rpc_argp = fhandle,
+		.rpc_resp = &res,
+	};
+	int status;
+
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (status == 0) {
+		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
+		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
+			server->caps |= NFS_CAP_ACLS;
+		if (res.has_links != 0)
+			server->caps |= NFS_CAP_HARDLINKS;
+		if (res.has_symlinks != 0)
+			server->caps |= NFS_CAP_SYMLINKS;
+		server->acl_bitmask = res.acl_bitmask;
+	}
+	return status;
+}
+
+static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_server_capabilities(server, fhandle),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fsinfo *info)
+{
+	struct nfs_fattr *	fattr = info->fattr;
+	struct nfs4_lookup_root_arg args = {
+		.bitmask = nfs4_fattr_bitmap,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	fattr->valid = 0;
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fsinfo *info)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_lookup_root(server, fhandle, info),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fsinfo *info)
+{
+	struct nfs_fattr *	fattr = info->fattr;
+	unsigned char *		p;
+	struct qstr		q;
+	struct nfs4_lookup_arg args = {
+		.dir_fh = fhandle,
+		.name = &q,
+		.bitmask = nfs4_fattr_bitmap,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	int status;
+
+	/*
+	 * Now we do a separate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done separately so that we can conveniently
+	 * catch an ERR_WRONGSEC if it occurs along the way...
+	 */
+	status = nfs4_lookup_root(server, fhandle, info);
+	if (status)
+		goto out;
+
+	p = server->mnt_path;
+	for (;;) {
+		struct nfs4_exception exception = { };
+
+		while (*p == '/')
+			p++;
+		if (!*p)
+			break;
+		q.name = p;
+		while (*p && (*p != '/'))
+			p++;
+		q.len = p - q.name;
+
+		do {
+			fattr->valid = 0;
+			status = nfs4_handle_exception(server,
+					rpc_call_sync(server->client, &msg, 0),
+					&exception);
+		} while (exception.retry);
+		if (status == 0)
+			continue;
+		if (status == -ENOENT) {
+			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
+			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
+		}
+		break;
+	}
+	if (status == 0)
+		status = nfs4_server_capabilities(server, fhandle);
+	if (status == 0)
+		status = nfs4_do_fsinfo(server, fhandle, info);
+out:
+	return status;
+}
+
+static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_getattr_arg args = {
+		.fh = fhandle,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs4_getattr_res res = {
+		.fattr = fattr,
+		.server = server,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	
+	fattr->valid = 0;
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_getattr(server, fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+/* 
+ * The file is not closed if it is opened due to the a request to change
+ * the size of the file. The open call will not be needed once the
+ * VFS layer lookup-intents are implemented.
+ *
+ * Close is called when the inode is destroyed.
+ * If we haven't opened the file for O_WRONLY, we
+ * need to in the size_change case to obtain a stateid.
+ *
+ * Got race?
+ * Because OPEN is always done by name in nfsv4, it is
+ * possible that we opened a different file by the same
+ * name.  We can recognize this race condition, but we
+ * can't do anything about it besides returning an error.
+ *
+ * This will be fixed with VFS changes (lookup-intent).
+ */
+static int
+nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		  struct iattr *sattr)
+{
+	struct inode *		inode = dentry->d_inode;
+	int			size_change = sattr->ia_valid & ATTR_SIZE;
+	struct nfs4_state	*state = NULL;
+	int need_iput = 0;
+	int status;
+
+	fattr->valid = 0;
+	
+	if (size_change) {
+		struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+		if (IS_ERR(cred))
+			return PTR_ERR(cred);
+		state = nfs4_find_state(inode, cred, FMODE_WRITE);
+		if (state == NULL) {
+			state = nfs4_open_delegated(dentry->d_inode,
+					FMODE_WRITE, cred);
+			if (IS_ERR(state))
+				state = nfs4_do_open(dentry->d_parent->d_inode,
+						dentry, FMODE_WRITE,
+						NULL, cred);
+			need_iput = 1;
+		}
+		put_rpccred(cred);
+		if (IS_ERR(state))
+			return PTR_ERR(state);
+
+		if (state->inode != inode) {
+			printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
+			status = -EIO;
+			goto out;
+		}
+	}
+	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
+			NFS_FH(inode), sattr, state);
+out:
+	if (state) {
+		inode = state->inode;
+		nfs4_close_state(state, FMODE_WRITE);
+		if (need_iput)
+			iput(inode);
+	}
+	return status;
+}
+
+static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = NFS_FH(dir),
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	
+	fattr->valid = 0;
+	
+	dprintk("NFS call  lookup %s\n", name->name);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_lookup(dir, name, fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+	struct nfs4_accessargs args = {
+		.fh = NFS_FH(inode),
+	};
+	struct nfs4_accessres res = { 0 };
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+		.rpc_cred = entry->cred,
+	};
+	int mode = entry->mask;
+	int status;
+
+	/*
+	 * Determine which access bits we want to ask for...
+	 */
+	if (mode & MAY_READ)
+		args.access |= NFS4_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			args.access |= NFS4_ACCESS_LOOKUP;
+	} else {
+		if (mode & MAY_WRITE)
+			args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			args.access |= NFS4_ACCESS_EXECUTE;
+	}
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (!status) {
+		entry->mask = 0;
+		if (res.access & NFS4_ACCESS_READ)
+			entry->mask |= MAY_READ;
+		if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
+			entry->mask |= MAY_WRITE;
+		if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+			entry->mask |= MAY_EXEC;
+	}
+	return status;
+}
+
+static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				_nfs4_proc_access(inode, entry),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+/*
+ * TODO: For the time being, we don't try to get any attributes
+ * along with any of the zero-copy operations READ, READDIR,
+ * READLINK, WRITE.
+ *
+ * In the case of the first three, we want to put the GETATTR
+ * after the read-type operation -- this is because it is hard
+ * to predict the length of a GETATTR response in v4, and thus
+ * align the READ data correctly.  This means that the GETATTR
+ * may end up partially falling into the page cache, and we should
+ * shift it into the 'tail' of the xdr_buf before processing.
+ * To do this efficiently, we need to know the total length
+ * of data received, which doesn't seem to be available outside
+ * of the RPC layer.
+ *
+ * In the case of WRITE, we also want to put the GETATTR after
+ * the operation -- in this case because we want to make sure
+ * we get the post-operation mtime and size.  This means that
+ * we can't use xdr_encode_pages() as written: we need a variant
+ * of it which would leave room in the 'tail' iovec.
+ *
+ * Both of these changes to the XDR layer would in fact be quite
+ * minor, but I decided to leave them for a subsequent patch.
+ */
+static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs4_readlink args = {
+		.fh       = NFS_FH(inode),
+		.pgbase	  = pgbase,
+		.pglen    = pglen,
+		.pages    = &page,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
+		.rpc_argp = &args,
+		.rpc_resp = NULL,
+	};
+
+	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+}
+
+static int nfs4_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				_nfs4_proc_readlink(inode, page, pgbase, pglen),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_read(struct nfs_read_data *rdata)
+{
+	int flags = rdata->flags;
+	struct inode *inode = rdata->inode;
+	struct nfs_fattr *fattr = rdata->res.fattr;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_READ],
+		.rpc_argp	= &rdata->args,
+		.rpc_resp	= &rdata->res,
+		.rpc_cred	= rdata->cred,
+	};
+	unsigned long timestamp = jiffies;
+	int status;
+
+	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
+			(long long) rdata->args.offset);
+
+	fattr->valid = 0;
+	status = rpc_call_sync(server->client, &msg, flags);
+	if (!status)
+		renew_lease(server, timestamp);
+	dprintk("NFS reply read: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_read(struct nfs_read_data *rdata)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(rdata->inode),
+				_nfs4_proc_read(rdata),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_write(struct nfs_write_data *wdata)
+{
+	int rpcflags = wdata->flags;
+	struct inode *inode = wdata->inode;
+	struct nfs_fattr *fattr = wdata->res.fattr;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_WRITE],
+		.rpc_argp	= &wdata->args,
+		.rpc_resp	= &wdata->res,
+		.rpc_cred	= wdata->cred,
+	};
+	int status;
+
+	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
+			(long long) wdata->args.offset);
+
+	fattr->valid = 0;
+	status = rpc_call_sync(server->client, &msg, rpcflags);
+	dprintk("NFS reply write: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_write(struct nfs_write_data *wdata)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
+				_nfs4_proc_write(wdata),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_commit(struct nfs_write_data *cdata)
+{
+	struct inode *inode = cdata->inode;
+	struct nfs_fattr *fattr = cdata->res.fattr;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
+		.rpc_argp	= &cdata->args,
+		.rpc_resp	= &cdata->res,
+		.rpc_cred	= cdata->cred,
+	};
+	int status;
+
+	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
+			(long long) cdata->args.offset);
+
+	fattr->valid = 0;
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply commit: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_commit(struct nfs_write_data *cdata)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
+				_nfs4_proc_commit(cdata),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+/*
+ * Got race?
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this create/open method is prone to inefficiency and race
+ * conditions due to the lookup, create, and open VFS calls from sys_open()
+ * placed on the wire.
+ *
+ * Given the above sorry state of affairs, I'm simply sending an OPEN.
+ * The file will be opened again in the subsequent VFS open call
+ * (nfs4_proc_file_open).
+ *
+ * The open for read will just hang around to be used by any process that
+ * opens the file O_RDONLY. This will all be resolved with the VFS changes.
+ */
+
+static int
+nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+                 int flags)
+{
+	struct nfs4_state *state;
+	struct rpc_cred *cred;
+	int status = 0;
+
+	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	if (IS_ERR(cred)) {
+		status = PTR_ERR(cred);
+		goto out;
+	}
+	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+	put_rpccred(cred);
+	if (IS_ERR(state)) {
+		status = PTR_ERR(state);
+		goto out;
+	}
+	d_instantiate(dentry, state->inode);
+	if (flags & O_EXCL) {
+		struct nfs_fattr fattr;
+		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
+		                     NFS_FH(state->inode), sattr, state);
+		if (status == 0)
+			goto out;
+	} else if (flags != 0)
+		goto out;
+	nfs4_close_state(state, flags);
+out:
+	return status;
+}
+
+static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs4_remove_arg args = {
+		.fh = NFS_FH(dir),
+		.name = name,
+	};
+	struct nfs4_change_info	res;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+	};
+	int			status;
+
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status == 0)
+		update_changeattr(dir, &res);
+	return status;
+}
+
+static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_remove(dir, name),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+struct unlink_desc {
+	struct nfs4_remove_arg	args;
+	struct nfs4_change_info	res;
+};
+
+static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
+		struct qstr *name)
+{
+	struct unlink_desc *up;
+
+	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+	
+	up->args.fh = NFS_FH(dir->d_inode);
+	up->args.name = name;
+	
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
+	msg->rpc_argp = &up->args;
+	msg->rpc_resp = &up->res;
+	return 0;
+}
+
+static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct unlink_desc *up;
+	
+	if (msg->rpc_resp != NULL) {
+		up = container_of(msg->rpc_resp, struct unlink_desc, res);
+		update_changeattr(dir->d_inode, &up->res);
+		kfree(up);
+		msg->rpc_resp = NULL;
+		msg->rpc_argp = NULL;
+	}
+	return 0;
+}
+
+static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs4_rename_arg arg = {
+		.old_dir = NFS_FH(old_dir),
+		.new_dir = NFS_FH(new_dir),
+		.old_name = old_name,
+		.new_name = new_name,
+	};
+	struct nfs4_rename_res res = { };
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+	};
+	int			status;
+	
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+
+	if (!status) {
+		update_changeattr(old_dir, &res.old_cinfo);
+		update_changeattr(new_dir, &res.new_cinfo);
+	}
+	return status;
+}
+
+static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(old_dir),
+				_nfs4_proc_rename(old_dir, old_name,
+					new_dir, new_name),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs4_link_arg arg = {
+		.fh     = NFS_FH(inode),
+		.dir_fh = NFS_FH(dir),
+		.name   = name,
+	};
+	struct nfs4_change_info	cinfo = { };
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
+		.rpc_argp = &arg,
+		.rpc_resp = &cinfo,
+	};
+	int			status;
+
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (!status)
+		update_changeattr(dir, &cinfo);
+
+	return status;
+}
+
+static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				_nfs4_proc_link(inode, dir, name),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
+		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs4_create_arg arg = {
+		.dir_fh = NFS_FH(dir),
+		.server = server,
+		.name = name,
+		.attrs = sattr,
+		.ftype = NF4LNK,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs4_create_res res = {
+		.server = server,
+		.fh = fhandle,
+		.fattr = fattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+	};
+	int			status;
+
+	if (path->len > NFS4_MAXPATHLEN)
+		return -ENAMETOOLONG;
+	arg.u.symlink = path;
+	fattr->valid = 0;
+	
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (!status)
+		update_changeattr(dir, &res.dir_cinfo);
+	return status;
+}
+
+static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
+		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_symlink(dir, name, path, sattr,
+					fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+		struct iattr *sattr)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	struct nfs4_create_arg arg = {
+		.dir_fh = NFS_FH(dir),
+		.server = server,
+		.name = &dentry->d_name,
+		.attrs = sattr,
+		.ftype = NF4DIR,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs4_create_res res = {
+		.server = server,
+		.fh = &fhandle,
+		.fattr = &fattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+	};
+	int			status;
+
+	fattr.valid = 0;
+	
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (!status) {
+		update_changeattr(dir, &res.dir_cinfo);
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+	return status;
+}
+
+static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+		struct iattr *sattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_mkdir(dir, dentry, sattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+                  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs4_readdir_arg args = {
+		.fh = NFS_FH(dir),
+		.pages = &page,
+		.pgbase = 0,
+		.count = count,
+		.bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+	};
+	struct nfs4_readdir_res res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+		.rpc_cred = cred,
+	};
+	int			status;
+
+	lock_kernel();
+	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
+	res.pgbase = args.pgbase;
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status == 0)
+		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+	unlock_kernel();
+	return status;
+}
+
+static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+                  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
+				_nfs4_proc_readdir(dentry, cred, cookie,
+					page, count, plus),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
+		struct iattr *sattr, dev_t rdev)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_fh fh;
+	struct nfs_fattr fattr;
+	struct nfs4_create_arg arg = {
+		.dir_fh = NFS_FH(dir),
+		.server = server,
+		.name = &dentry->d_name,
+		.attrs = sattr,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs4_create_res res = {
+		.server = server,
+		.fh = &fh,
+		.fattr = &fattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+	};
+	int			status;
+	int                     mode = sattr->ia_mode;
+
+	fattr.valid = 0;
+
+	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+	if (S_ISFIFO(mode))
+		arg.ftype = NF4FIFO;
+	else if (S_ISBLK(mode)) {
+		arg.ftype = NF4BLK;
+		arg.u.device.specdata1 = MAJOR(rdev);
+		arg.u.device.specdata2 = MINOR(rdev);
+	}
+	else if (S_ISCHR(mode)) {
+		arg.ftype = NF4CHR;
+		arg.u.device.specdata1 = MAJOR(rdev);
+		arg.u.device.specdata2 = MINOR(rdev);
+	}
+	else
+		arg.ftype = NF4SOCK;
+	
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status == 0) {
+		update_changeattr(dir, &res.dir_cinfo);
+		status = nfs_instantiate(dentry, &fh, &fattr);
+	}
+	return status;
+}
+
+static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
+		struct iattr *sattr, dev_t rdev)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_mknod(dir, dentry, sattr, rdev),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *fsstat)
+{
+	struct nfs4_statfs_arg args = {
+		.fh = fhandle,
+		.bitmask = server->attr_bitmask,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
+		.rpc_argp = &args,
+		.rpc_resp = fsstat,
+	};
+
+	fsstat->fattr->valid = 0;
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_statfs(server, fhandle, fsstat),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fsinfo *fsinfo)
+{
+	struct nfs4_fsinfo_arg args = {
+		.fh = fhandle,
+		.bitmask = server->attr_bitmask,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
+		.rpc_argp = &args,
+		.rpc_resp = fsinfo,
+	};
+
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_do_fsinfo(server, fhandle, fsinfo),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
+{
+	fsinfo->fattr->valid = 0;
+	return nfs4_do_fsinfo(server, fhandle, fsinfo);
+}
+
+static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_pathconf *pathconf)
+{
+	struct nfs4_pathconf_arg args = {
+		.fh = fhandle,
+		.bitmask = server->attr_bitmask,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
+		.rpc_argp = &args,
+		.rpc_resp = pathconf,
+	};
+
+	/* None of the pathconf attributes are mandatory to implement */
+	if ((args.bitmask[0] & nfs4_pathconf_bitmap[0]) == 0) {
+		memset(pathconf, 0, sizeof(*pathconf));
+		return 0;
+	}
+
+	pathconf->fattr->valid = 0;
+	return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_pathconf *pathconf)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_pathconf(server, fhandle, pathconf),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static void
+nfs4_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct inode *inode = data->inode;
+
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		rpc_restart_call(task);
+		return;
+	}
+	if (task->tk_status > 0)
+		renew_lease(NFS_SERVER(inode), data->timestamp);
+	/* Call back common NFS readpage processing */
+	nfs_readpage_result(task);
+}
+
+static void
+nfs4_proc_read_setup(struct nfs_read_data *data)
+{
+	struct rpc_task	*task = &data->task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	int flags;
+
+	data->timestamp   = jiffies;
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct inode *inode = data->inode;
+	
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		rpc_restart_call(task);
+		return;
+	}
+	if (task->tk_status >= 0)
+		renew_lease(NFS_SERVER(inode), data->timestamp);
+	/* Call back common NFS writeback processing */
+	nfs_writeback_done(task);
+}
+
+static void
+nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
+	};
+	struct inode *inode = data->inode;
+	int stable;
+	int flags;
+	
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+	data->args.stable = stable;
+
+	data->timestamp   = jiffies;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct inode *inode = data->inode;
+	
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		rpc_restart_call(task);
+		return;
+	}
+	/* Call back common NFS writeback processing */
+	nfs_commit_done(task);
+}
+
+static void
+nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task	*task = &data->task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
+	};	
+	struct inode *inode = data->inode;
+	int flags;
+	
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	rpc_call_setup(task, &msg, 0);	
+}
+
+/*
+ * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
+ * standalone procedure for queueing an asynchronous RENEW.
+ */
+static void
+renew_done(struct rpc_task *task)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	unsigned long timestamp = (unsigned long)task->tk_calldata;
+
+	if (task->tk_status < 0) {
+		switch (task->tk_status) {
+			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_CB_PATH_DOWN:
+				nfs4_schedule_state_recovery(clp);
+		}
+		return;
+	}
+	spin_lock(&clp->cl_lock);
+	if (time_before(clp->cl_last_renewal,timestamp))
+		clp->cl_last_renewal = timestamp;
+	spin_unlock(&clp->cl_lock);
+}
+
+int
+nfs4_proc_async_renew(struct nfs4_client *clp)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
+		.rpc_argp	= clp,
+		.rpc_cred	= clp->cl_cred,
+	};
+
+	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+			renew_done, (void *)jiffies);
+}
+
+int
+nfs4_proc_renew(struct nfs4_client *clp)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
+		.rpc_argp	= clp,
+		.rpc_cred	= clp->cl_cred,
+	};
+	unsigned long now = jiffies;
+	int status;
+
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+	if (status < 0)
+		return status;
+	spin_lock(&clp->cl_lock);
+	if (time_before(clp->cl_last_renewal,now))
+		clp->cl_last_renewal = now;
+	spin_unlock(&clp->cl_lock);
+	return 0;
+}
+
+/*
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this open method is prone to inefficiency and race conditions
+ * due to the lookup, potential create, and open VFS calls from sys_open()
+ * placed on the wire.
+ */
+static int
+nfs4_proc_file_open(struct inode *inode, struct file *filp)
+{
+	struct dentry *dentry = filp->f_dentry;
+	struct nfs_open_context *ctx;
+	struct nfs4_state *state = NULL;
+	struct rpc_cred *cred;
+	int status = -ENOMEM;
+
+	dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
+	                       (int)dentry->d_parent->d_name.len,
+	                       dentry->d_parent->d_name.name,
+	                       (int)dentry->d_name.len, dentry->d_name.name);
+
+
+	/* Find our open stateid */
+	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	ctx = alloc_nfs_open_context(dentry, cred);
+	put_rpccred(cred);
+	if (unlikely(ctx == NULL))
+		return -ENOMEM;
+	status = -EIO; /* ERACE actually */
+	state = nfs4_find_state(inode, cred, filp->f_mode);
+	if (unlikely(state == NULL))
+		goto no_state;
+	ctx->state = state;
+	nfs4_close_state(state, filp->f_mode);
+	ctx->mode = filp->f_mode;
+	nfs_file_set_open_context(filp, ctx);
+	put_nfs_open_context(ctx);
+	if (filp->f_mode & FMODE_WRITE)
+		nfs_begin_data_update(inode);
+	return 0;
+no_state:
+	printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
+	put_nfs_open_context(ctx);
+	return status;
+}
+
+/*
+ * Release our state
+ */
+static int
+nfs4_proc_file_release(struct inode *inode, struct file *filp)
+{
+	if (filp->f_mode & FMODE_WRITE)
+		nfs_end_data_update(inode);
+	nfs_file_clear_open_context(filp);
+	return 0;
+}
+
+static int
+nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+
+	if (!clp || task->tk_status >= 0)
+		return 0;
+	switch(task->tk_status) {
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
+			nfs4_schedule_state_recovery(clp);
+			if (test_bit(NFS4CLNT_OK, &clp->cl_state))
+				rpc_wake_up_task(task);
+			task->tk_status = 0;
+			return -EAGAIN;
+		case -NFS4ERR_GRACE:
+		case -NFS4ERR_DELAY:
+			rpc_delay(task, NFS4_POLL_RETRY_MAX);
+			task->tk_status = 0;
+			return -EAGAIN;
+		case -NFS4ERR_OLD_STATEID:
+			task->tk_status = 0;
+			return -EAGAIN;
+	}
+	task->tk_status = nfs4_map_errors(task->tk_status);
+	return 0;
+}
+
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+{
+	DEFINE_WAIT(wait);
+	sigset_t oldset;
+	int interruptible, res = 0;
+
+	might_sleep();
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	interruptible = TASK_UNINTERRUPTIBLE;
+	if (clnt->cl_intr)
+		interruptible = TASK_INTERRUPTIBLE;
+	prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
+	nfs4_schedule_state_recovery(clp);
+	if (clnt->cl_intr && signalled())
+		res = -ERESTARTSYS;
+	else if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
+		schedule();
+	finish_wait(&clp->cl_waitq, &wait);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return res;
+}
+
+static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
+{
+	sigset_t oldset;
+	int res = 0;
+
+	might_sleep();
+
+	if (*timeout <= 0)
+		*timeout = NFS4_POLL_RETRY_MIN;
+	if (*timeout > NFS4_POLL_RETRY_MAX)
+		*timeout = NFS4_POLL_RETRY_MAX;
+	rpc_clnt_sigmask(clnt, &oldset);
+	if (clnt->cl_intr) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(*timeout);
+		if (signalled())
+			res = -ERESTARTSYS;
+	} else {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(*timeout);
+	}
+	rpc_clnt_sigunmask(clnt, &oldset);
+	*timeout <<= 1;
+	return res;
+}
+
+/* This is the error handling routine for processes that are allowed
+ * to sleep.
+ */
+int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+	int ret = errorcode;
+
+	exception->retry = 0;
+	switch(errorcode) {
+		case 0:
+			return 0;
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			ret = nfs4_wait_clnt_recover(server->client, clp);
+			if (ret == 0)
+				exception->retry = 1;
+			break;
+		case -NFS4ERR_GRACE:
+		case -NFS4ERR_DELAY:
+			ret = nfs4_delay(server->client, &exception->timeout);
+			if (ret == 0)
+				exception->retry = 1;
+			break;
+		case -NFS4ERR_OLD_STATEID:
+			if (ret == 0)
+				exception->retry = 1;
+	}
+	/* We failed to handle the error */
+	return nfs4_map_errors(ret);
+}
+
+int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port)
+{
+	nfs4_verifier sc_verifier;
+	struct nfs4_setclientid setclientid = {
+		.sc_verifier = &sc_verifier,
+		.sc_prog = program,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
+		.rpc_argp = &setclientid,
+		.rpc_resp = clp,
+		.rpc_cred = clp->cl_cred,
+	};
+	u32 *p;
+	int loop = 0;
+	int status;
+
+	p = (u32*)sc_verifier.data;
+	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
+	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
+
+	for(;;) {
+		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
+				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_cred->cr_ops->cr_name,
+				clp->cl_id_uniquifier);
+		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
+				sizeof(setclientid.sc_netid), "tcp");
+		setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
+				sizeof(setclientid.sc_uaddr), "%s.%d.%d",
+				clp->cl_ipaddr, port >> 8, port & 255);
+
+		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+		if (status != -NFS4ERR_CLID_INUSE)
+			break;
+		if (signalled())
+			break;
+		if (loop++ & 1)
+			ssleep(clp->cl_lease_time + 1);
+		else
+			if (++clp->cl_id_uniquifier == 0)
+				break;
+	}
+	return status;
+}
+
+int
+nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
+{
+	struct nfs_fsinfo fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
+		.rpc_argp = clp,
+		.rpc_resp = &fsinfo,
+		.rpc_cred = clp->cl_cred,
+	};
+	unsigned long now;
+	int status;
+
+	now = jiffies;
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+	if (status == 0) {
+		spin_lock(&clp->cl_lock);
+		clp->cl_lease_time = fsinfo.lease_time * HZ;
+		clp->cl_last_renewal = now;
+		spin_unlock(&clp->cl_lock);
+	}
+	return status;
+}
+
+static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+{
+	struct nfs4_delegreturnargs args = {
+		.fhandle = NFS_FH(inode),
+		.stateid = stateid,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
+		.rpc_argp = &args,
+		.rpc_cred = cred,
+	};
+
+	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+}
+
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs4_proc_delegreturn(inode, cred, stateid);
+		switch (err) {
+			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_EXPIRED:
+				nfs4_schedule_state_recovery(server->nfs4_state);
+			case 0:
+				return 0;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
+#define NFS4_LOCK_MINTIMEOUT (1 * HZ)
+#define NFS4_LOCK_MAXTIMEOUT (30 * HZ)
+
+/* 
+ * sleep, with exponential backoff, and retry the LOCK operation. 
+ */
+static unsigned long
+nfs4_set_lock_task_retry(unsigned long timeout)
+{
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(timeout);
+	timeout <<= 1;
+	if (timeout > NFS4_LOCK_MAXTIMEOUT)
+		return NFS4_LOCK_MAXTIMEOUT;
+	return timeout;
+}
+
+static inline int
+nfs4_lck_type(int cmd, struct file_lock *request)
+{
+	/* set lock type */
+	switch (request->fl_type) {
+		case F_RDLCK:
+			return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT;
+		case F_WRLCK:
+			return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
+		case F_UNLCK:
+			return NFS4_WRITE_LT; 
+	}
+	BUG();
+	return 0;
+}
+
+static inline uint64_t
+nfs4_lck_length(struct file_lock *request)
+{
+	if (request->fl_end == OFFSET_MAX)
+		return ~(uint64_t)0;
+	return request->fl_end - request->fl_start + 1;
+}
+
+static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_lockargs arg = {
+		.fh = NFS_FH(inode),
+		.type = nfs4_lck_type(cmd, request),
+		.offset = request->fl_start,
+		.length = nfs4_lck_length(request),
+	};
+	struct nfs_lockres res = {
+		.server = server,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
+		.rpc_argp       = &arg,
+		.rpc_resp       = &res,
+		.rpc_cred	= state->owner->so_cred,
+	};
+	struct nfs_lowner nlo;
+	struct nfs4_lock_state *lsp;
+	int status;
+
+	down_read(&clp->cl_sem);
+	nlo.clientid = clp->cl_clientid;
+	down(&state->lock_sema);
+	lsp = nfs4_find_lock_state(state, request->fl_owner);
+	if (lsp)
+		nlo.id = lsp->ls_id; 
+	else {
+		spin_lock(&clp->cl_lock);
+		nlo.id = nfs4_alloc_lockowner_id(clp);
+		spin_unlock(&clp->cl_lock);
+	}
+	arg.u.lockt = &nlo;
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (!status) {
+		request->fl_type = F_UNLCK;
+	} else if (status == -NFS4ERR_DENIED) {
+		int64_t len, start, end;
+		start = res.u.denied.offset;
+		len = res.u.denied.length;
+		end = start + len - 1;
+		if (end < 0 || len == 0)
+			request->fl_end = OFFSET_MAX;
+		else
+			request->fl_end = (loff_t)end;
+		request->fl_start = (loff_t)start;
+		request->fl_type = F_WRLCK;
+		if (res.u.denied.type & 1)
+			request->fl_type = F_RDLCK;
+		request->fl_pid = 0;
+		status = 0;
+	}
+	if (lsp)
+		nfs4_put_lock_state(lsp);
+	up(&state->lock_sema);
+	up_read(&clp->cl_sem);
+	return status;
+}
+
+static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(state->inode),
+				_nfs4_proc_getlk(state, cmd, request),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int do_vfs_lock(struct file *file, struct file_lock *fl)
+{
+	int res = 0;
+	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+		case FL_POSIX:
+			res = posix_lock_file_wait(file, fl);
+			break;
+		case FL_FLOCK:
+			res = flock_lock_file_wait(file, fl);
+			break;
+		default:
+			BUG();
+	}
+	if (res < 0)
+		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
+				__FUNCTION__);
+	return res;
+}
+
+static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_lockargs arg = {
+		.fh = NFS_FH(inode),
+		.type = nfs4_lck_type(cmd, request),
+		.offset = request->fl_start,
+		.length = nfs4_lck_length(request),
+	};
+	struct nfs_lockres res = {
+		.server = server,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
+		.rpc_argp       = &arg,
+		.rpc_resp       = &res,
+		.rpc_cred	= state->owner->so_cred,
+	};
+	struct nfs4_lock_state *lsp;
+	struct nfs_locku_opargs luargs;
+	int status = 0;
+			
+	down_read(&clp->cl_sem);
+	down(&state->lock_sema);
+	lsp = nfs4_find_lock_state(state, request->fl_owner);
+	if (!lsp)
+		goto out;
+	/* We might have lost the locks! */
+	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
+		luargs.seqid = lsp->ls_seqid;
+		memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+		arg.u.locku = &luargs;
+		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		nfs4_increment_lock_seqid(status, lsp);
+	}
+
+	if (status == 0) {
+		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
+				sizeof(lsp->ls_stateid));
+		nfs4_notify_unlck(state, request, lsp);
+	}
+	nfs4_put_lock_state(lsp);
+out:
+	up(&state->lock_sema);
+	if (status == 0)
+		do_vfs_lock(request->fl_file, request);
+	up_read(&clp->cl_sem);
+	return status;
+}
+
+static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(state->inode),
+				_nfs4_proc_unlck(state, cmd, request),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
+{
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs4_lock_state *lsp;
+	struct nfs_lockargs arg = {
+		.fh = NFS_FH(inode),
+		.type = nfs4_lck_type(cmd, request),
+		.offset = request->fl_start,
+		.length = nfs4_lck_length(request),
+	};
+	struct nfs_lockres res = {
+		.server = server,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+		.rpc_argp       = &arg,
+		.rpc_resp       = &res,
+		.rpc_cred	= state->owner->so_cred,
+	};
+	struct nfs_lock_opargs largs = {
+		.reclaim = reclaim,
+		.new_lock_owner = 0,
+	};
+	int status;
+
+	lsp = nfs4_get_lock_state(state, request->fl_owner);
+	if (lsp == NULL)
+		return -ENOMEM;
+	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
+		struct nfs4_state_owner *owner = state->owner;
+		struct nfs_open_to_lock otl = {
+			.lock_owner = {
+				.clientid = server->nfs4_state->cl_clientid,
+			},
+		};
+
+		otl.lock_seqid = lsp->ls_seqid;
+		otl.lock_owner.id = lsp->ls_id;
+		memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
+		largs.u.open_lock = &otl;
+		largs.new_lock_owner = 1;
+		arg.u.lock = &largs;
+		down(&owner->so_sema);
+		otl.open_seqid = owner->so_seqid;
+		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		/* increment open_owner seqid on success, and 
+		* seqid mutating errors */
+		nfs4_increment_seqid(status, owner);
+		up(&owner->so_sema);
+	} else {
+		struct nfs_exist_lock el = {
+			.seqid = lsp->ls_seqid,
+		};
+		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
+		largs.u.exist_lock = &el;
+		largs.new_lock_owner = 0;
+		arg.u.lock = &largs;
+		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	}
+	/* increment seqid on success, and * seqid mutating errors*/
+	nfs4_increment_lock_seqid(status, lsp);
+	/* save the returned stateid. */
+	if (status == 0) {
+		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
+		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+		if (!reclaim)
+			nfs4_notify_setlk(state, request, lsp);
+	} else if (status == -NFS4ERR_DENIED)
+		status = -EAGAIN;
+	nfs4_put_lock_state(lsp);
+	return status;
+}
+
+static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
+{
+	return _nfs4_do_setlk(state, F_SETLK, request, 1);
+}
+
+static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+	return _nfs4_do_setlk(state, F_SETLK, request, 0);
+}
+
+static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_client *clp = state->owner->so_client;
+	int status;
+
+	down_read(&clp->cl_sem);
+	down(&state->lock_sema);
+	status = _nfs4_do_setlk(state, cmd, request, 0);
+	up(&state->lock_sema);
+	if (status == 0) {
+		/* Note: we always want to sleep here! */
+		request->fl_flags |= FL_SLEEP;
+		if (do_vfs_lock(request->fl_file, request) < 0)
+			printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+	}
+	up_read(&clp->cl_sem);
+	return status;
+}
+
+static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(state->inode),
+				_nfs4_proc_setlk(state, cmd, request),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int
+nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
+{
+	struct nfs_open_context *ctx;
+	struct nfs4_state *state;
+	unsigned long timeout = NFS4_LOCK_MINTIMEOUT;
+	int status;
+
+	/* verify open state */
+	ctx = (struct nfs_open_context *)filp->private_data;
+	state = ctx->state;
+
+	if (request->fl_start < 0 || request->fl_end < 0)
+		return -EINVAL;
+
+	if (IS_GETLK(cmd))
+		return nfs4_proc_getlk(state, F_GETLK, request);
+
+	if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
+		return -EINVAL;
+
+	if (request->fl_type == F_UNLCK)
+		return nfs4_proc_unlck(state, cmd, request);
+
+	do {
+		status = nfs4_proc_setlk(state, cmd, request);
+		if ((status != -EAGAIN) || IS_SETLK(cmd))
+			break;
+		timeout = nfs4_set_lock_task_retry(timeout);
+		status = -ERESTARTSYS;
+		if (signalled())
+			break;
+	} while(status < 0);
+
+	return status;
+}
+
+struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
+	.recover_open	= nfs4_open_reclaim,
+	.recover_lock	= nfs4_lock_reclaim,
+};
+
+struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
+	.recover_open	= nfs4_open_expired,
+	.recover_lock	= nfs4_lock_expired,
+};
+
+struct nfs_rpc_ops	nfs_v4_clientops = {
+	.version	= 4,			/* protocol version */
+	.dentry_ops	= &nfs4_dentry_operations,
+	.dir_inode_ops	= &nfs4_dir_inode_operations,
+	.getroot	= nfs4_proc_get_root,
+	.getattr	= nfs4_proc_getattr,
+	.setattr	= nfs4_proc_setattr,
+	.lookup		= nfs4_proc_lookup,
+	.access		= nfs4_proc_access,
+	.readlink	= nfs4_proc_readlink,
+	.read		= nfs4_proc_read,
+	.write		= nfs4_proc_write,
+	.commit		= nfs4_proc_commit,
+	.create		= nfs4_proc_create,
+	.remove		= nfs4_proc_remove,
+	.unlink_setup	= nfs4_proc_unlink_setup,
+	.unlink_done	= nfs4_proc_unlink_done,
+	.rename		= nfs4_proc_rename,
+	.link		= nfs4_proc_link,
+	.symlink	= nfs4_proc_symlink,
+	.mkdir		= nfs4_proc_mkdir,
+	.rmdir		= nfs4_proc_remove,
+	.readdir	= nfs4_proc_readdir,
+	.mknod		= nfs4_proc_mknod,
+	.statfs		= nfs4_proc_statfs,
+	.fsinfo		= nfs4_proc_fsinfo,
+	.pathconf	= nfs4_proc_pathconf,
+	.decode_dirent	= nfs4_decode_dirent,
+	.read_setup	= nfs4_proc_read_setup,
+	.write_setup	= nfs4_proc_write_setup,
+	.commit_setup	= nfs4_proc_commit_setup,
+	.file_open      = nfs4_proc_file_open,
+	.file_release   = nfs4_proc_file_release,
+	.lock		= nfs4_proc_lock,
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 00000000000..667e06f1c64
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,148 @@
+/*
+ *  fs/nfs/nfs4renewd.c
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server.  The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context.  There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests.  We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished.  Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY	NFSDBG_PROC
+
+void
+nfs4_renew_state(void *data)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)data;
+	long lease, timeout;
+	unsigned long last, now;
+
+	down_read(&clp->cl_sem);
+	dprintk("%s: start\n", __FUNCTION__);
+	/* Are there any active superblocks? */
+	if (list_empty(&clp->cl_superblocks))
+		goto out; 
+	spin_lock(&clp->cl_lock);
+	lease = clp->cl_lease_time;
+	last = clp->cl_last_renewal;
+	now = jiffies;
+	timeout = (2 * lease) / 3 + (long)last - (long)now;
+	/* Are we close to a lease timeout? */
+	if (time_after(now, last + lease/3)) {
+		spin_unlock(&clp->cl_lock);
+		/* Queue an asynchronous RENEW. */
+		nfs4_proc_async_renew(clp);
+		timeout = (2 * lease) / 3;
+		spin_lock(&clp->cl_lock);
+	} else
+		dprintk("%s: failed to call renewd. Reason: lease not expired \n",
+				__FUNCTION__);
+	if (timeout < 5 * HZ)    /* safeguard */
+		timeout = 5 * HZ;
+	dprintk("%s: requeueing work. Lease period = %ld\n",
+			__FUNCTION__, (timeout + HZ - 1) / HZ);
+	cancel_delayed_work(&clp->cl_renewd);
+	schedule_delayed_work(&clp->cl_renewd, timeout);
+	spin_unlock(&clp->cl_lock);
+out:
+	up_read(&clp->cl_sem);
+	dprintk("%s: done\n", __FUNCTION__);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_schedule_state_renewal(struct nfs4_client *clp)
+{
+	long timeout;
+
+	spin_lock(&clp->cl_lock);
+	timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
+		- (long)jiffies;
+	if (timeout < 5 * HZ)
+		timeout = 5 * HZ;
+	dprintk("%s: requeueing work. Lease period = %ld\n",
+			__FUNCTION__, (timeout + HZ - 1) / HZ);
+	cancel_delayed_work(&clp->cl_renewd);
+	schedule_delayed_work(&clp->cl_renewd, timeout);
+	spin_unlock(&clp->cl_lock);
+}
+
+void
+nfs4_renewd_prepare_shutdown(struct nfs_server *server)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+
+	if (!clp)
+		return;
+	flush_scheduled_work();
+	down_write(&clp->cl_sem);
+	if (!list_empty(&server->nfs4_siblings))
+		list_del_init(&server->nfs4_siblings);
+	up_write(&clp->cl_sem);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_kill_renewd(struct nfs4_client *clp)
+{
+	down_read(&clp->cl_sem);
+	if (!list_empty(&clp->cl_superblocks)) {
+		up_read(&clp->cl_sem);
+		return;
+	}
+	cancel_delayed_work(&clp->cl_renewd);
+	up_read(&clp->cl_sem);
+	flush_scheduled_work();
+}
+
+/*
+ * Local variables:
+ *   c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 00000000000..231cebce3c8
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,932 @@
+/*
+ *  fs/nfs/nfs4state.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model.  For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+
+#include "callback.h"
+#include "delegation.h"
+
+#define OPENOWNER_POOL_SIZE	8
+
+static DEFINE_SPINLOCK(state_spinlock);
+
+nfs4_stateid zero_stateid;
+
+#if 0
+nfs4_stateid one_stateid =
+	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+#endif
+
+static LIST_HEAD(nfs4_clientid_list);
+
+static void nfs4_recover_state(void *);
+extern void nfs4_renew_state(void *);
+
+void
+init_nfsv4_state(struct nfs_server *server)
+{
+	server->nfs4_state = NULL;
+	INIT_LIST_HEAD(&server->nfs4_siblings);
+}
+
+void
+destroy_nfsv4_state(struct nfs_server *server)
+{
+	if (server->mnt_path) {
+		kfree(server->mnt_path);
+		server->mnt_path = NULL;
+	}
+	if (server->nfs4_state) {
+		nfs4_put_client(server->nfs4_state);
+		server->nfs4_state = NULL;
+	}
+}
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs4_client *
+nfs4_alloc_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+
+	if (nfs_callback_up() < 0)
+		return NULL;
+	if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
+		nfs_callback_down();
+		return NULL;
+	}
+	memset(clp, 0, sizeof(*clp));
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	atomic_set(&clp->cl_count, 1);
+	INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	init_waitqueue_head(&clp->cl_waitq);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_OK;
+	return clp;
+}
+
+static void
+nfs4_free_client(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+
+	while (!list_empty(&clp->cl_unused)) {
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (clp->cl_cred)
+		put_rpccred(clp->cl_cred);
+	nfs_idmap_delete(clp);
+	if (clp->cl_rpcclient)
+		rpc_shutdown_client(clp->cl_rpcclient);
+	kfree(clp);
+	nfs_callback_down();
+}
+
+static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
+		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
+			atomic_inc(&clp->cl_count);
+			return clp;
+		}
+	}
+	return NULL;
+}
+
+struct nfs4_client *nfs4_find_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+	spin_lock(&state_spinlock);
+	clp = __nfs4_find_client(addr);
+	spin_unlock(&state_spinlock);
+	return clp;
+}
+
+struct nfs4_client *
+nfs4_get_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp, *new = NULL;
+
+	spin_lock(&state_spinlock);
+	for (;;) {
+		clp = __nfs4_find_client(addr);
+		if (clp != NULL)
+			break;
+		clp = new;
+		if (clp != NULL) {
+			list_add(&clp->cl_servers, &nfs4_clientid_list);
+			new = NULL;
+			break;
+		}
+		spin_unlock(&state_spinlock);
+		new = nfs4_alloc_client(addr);
+		spin_lock(&state_spinlock);
+		if (new == NULL)
+			break;
+	}
+	spin_unlock(&state_spinlock);
+	if (new)
+		nfs4_free_client(new);
+	return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
+		return;
+	list_del(&clp->cl_servers);
+	spin_unlock(&state_spinlock);
+	BUG_ON(!list_empty(&clp->cl_superblocks));
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+	nfs4_kill_renewd(clp);
+	nfs4_free_client(clp);
+}
+
+static int __nfs4_init_client(struct nfs4_client *clp)
+{
+	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
+	if (status == 0)
+		status = nfs4_proc_setclientid_confirm(clp);
+	if (status == 0)
+		nfs4_schedule_state_renewal(clp);
+	return status;
+}
+
+int nfs4_init_client(struct nfs4_client *clp)
+{
+	return nfs4_map_errors(__nfs4_init_client(clp));
+}
+
+u32
+nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+{
+	return clp->cl_lockowner_id ++;
+}
+
+static struct nfs4_state_owner *
+nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	struct nfs4_state_owner *sp = NULL;
+
+	if (!list_empty(&clp->cl_unused)) {
+		sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
+		atomic_inc(&sp->so_count);
+		sp->so_cred = cred;
+		list_move(&sp->so_list, &clp->cl_state_owners);
+		clp->cl_nunused--;
+	}
+	return sp;
+}
+
+static struct nfs4_state_owner *
+nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	struct nfs4_state_owner *sp, *res = NULL;
+
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		if (sp->so_cred != cred)
+			continue;
+		atomic_inc(&sp->so_count);
+		/* Move to the head of the list */
+		list_move(&sp->so_list, &clp->cl_state_owners);
+		res = sp;
+		break;
+	}
+	return res;
+}
+
+/*
+ * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
+ * create a new state_owner.
+ *
+ */
+static struct nfs4_state_owner *
+nfs4_alloc_state_owner(void)
+{
+	struct nfs4_state_owner *sp;
+
+	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	if (!sp)
+		return NULL;
+	init_MUTEX(&sp->so_sema);
+	sp->so_seqid = 0;                 /* arbitrary */
+	INIT_LIST_HEAD(&sp->so_states);
+	INIT_LIST_HEAD(&sp->so_delegations);
+	atomic_set(&sp->so_count, 1);
+	return sp;
+}
+
+void
+nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+{
+	struct nfs4_client *clp = sp->so_client;
+	spin_lock(&clp->cl_lock);
+	list_del_init(&sp->so_list);
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Note: must be called with clp->cl_sem held in order to prevent races
+ *       with reboot recovery!
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs4_state_owner *sp, *new;
+
+	get_rpccred(cred);
+	new = nfs4_alloc_state_owner();
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_find_state_owner(clp, cred);
+	if (sp == NULL)
+		sp = nfs4_client_grab_unused(clp, cred);
+	if (sp == NULL && new != NULL) {
+		list_add(&new->so_list, &clp->cl_state_owners);
+		new->so_client = clp;
+		new->so_id = nfs4_alloc_lockowner_id(clp);
+		new->so_cred = cred;
+		sp = new;
+		new = NULL;
+	}
+	spin_unlock(&clp->cl_lock);
+	if (new)
+		kfree(new);
+	if (sp != NULL)
+		return sp;
+	put_rpccred(cred);
+	return NULL;
+}
+
+/*
+ * Must be called with clp->cl_sem held in order to avoid races
+ * with state recovery...
+ */
+void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+{
+	struct nfs4_client *clp = sp->so_client;
+	struct rpc_cred *cred = sp->so_cred;
+
+	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+		return;
+	if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
+		goto out_free;
+	if (list_empty(&sp->so_list))
+		goto out_free;
+	list_move(&sp->so_list, &clp->cl_unused);
+	clp->cl_nunused++;
+	spin_unlock(&clp->cl_lock);
+	put_rpccred(cred);
+	cred = NULL;
+	return;
+out_free:
+	list_del(&sp->so_list);
+	spin_unlock(&clp->cl_lock);
+	put_rpccred(cred);
+	kfree(sp);
+}
+
+static struct nfs4_state *
+nfs4_alloc_open_state(void)
+{
+	struct nfs4_state *state;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+	state->state = 0;
+	state->nreaders = 0;
+	state->nwriters = 0;
+	state->flags = 0;
+	memset(state->stateid.data, 0, sizeof(state->stateid.data));
+	atomic_set(&state->count, 1);
+	INIT_LIST_HEAD(&state->lock_states);
+	init_MUTEX(&state->lock_sema);
+	rwlock_init(&state->state_lock);
+	return state;
+}
+
+static struct nfs4_state *
+__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_state *state;
+
+	mode &= (FMODE_READ|FMODE_WRITE);
+	list_for_each_entry(state, &nfsi->open_states, inode_states) {
+		if (state->owner->so_cred != cred)
+			continue;
+		if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
+			continue;
+		if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
+			continue;
+		if ((state->state & mode) != mode)
+			continue;
+		atomic_inc(&state->count);
+		if (mode & FMODE_READ)
+			state->nreaders++;
+		if (mode & FMODE_WRITE)
+			state->nwriters++;
+		return state;
+	}
+	return NULL;
+}
+
+static struct nfs4_state *
+__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_state *state;
+
+	list_for_each_entry(state, &nfsi->open_states, inode_states) {
+		/* Is this in the process of being freed? */
+		if (state->nreaders == 0 && state->nwriters == 0)
+			continue;
+		if (state->owner == owner) {
+			atomic_inc(&state->count);
+			return state;
+		}
+	}
+	return NULL;
+}
+
+struct nfs4_state *
+nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+	struct nfs4_state *state;
+
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state(inode, cred, mode);
+	spin_unlock(&inode->i_lock);
+	return state;
+}
+
+static void
+nfs4_free_open_state(struct nfs4_state *state)
+{
+	kfree(state);
+}
+
+struct nfs4_state *
+nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
+{
+	struct nfs4_state *state, *new;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state_byowner(inode, owner);
+	spin_unlock(&inode->i_lock);
+	if (state)
+		goto out;
+	new = nfs4_alloc_open_state();
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state_byowner(inode, owner);
+	if (state == NULL && new != NULL) {
+		state = new;
+		/* Caller *must* be holding owner->so_sem */
+		/* Note: The reclaim code dictates that we add stateless
+		 * and read-only stateids to the end of the list */
+		list_add_tail(&state->open_states, &owner->so_states);
+		state->owner = owner;
+		atomic_inc(&owner->so_count);
+		list_add(&state->inode_states, &nfsi->open_states);
+		state->inode = igrab(inode);
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_unlock(&inode->i_lock);
+		if (new)
+			nfs4_free_open_state(new);
+	}
+out:
+	return state;
+}
+
+/*
+ * Beware! Caller must be holding exactly one
+ * reference to clp->cl_sem and owner->so_sema!
+ */
+void nfs4_put_open_state(struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct nfs4_state_owner *owner = state->owner;
+
+	if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
+		return;
+	if (!list_empty(&state->inode_states))
+		list_del(&state->inode_states);
+	spin_unlock(&inode->i_lock);
+	list_del(&state->open_states);
+	iput(inode);
+	BUG_ON (state->state != 0);
+	nfs4_free_open_state(state);
+	nfs4_put_state_owner(owner);
+}
+
+/*
+ * Beware! Caller must be holding no references to clp->cl_sem!
+ * of owner->so_sema!
+ */
+void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+{
+	struct inode *inode = state->inode;
+	struct nfs4_state_owner *owner = state->owner;
+	struct nfs4_client *clp = owner->so_client;
+	int newstate;
+
+	atomic_inc(&owner->so_count);
+	down_read(&clp->cl_sem);
+	down(&owner->so_sema);
+	/* Protect against nfs4_find_state() */
+	spin_lock(&inode->i_lock);
+	if (mode & FMODE_READ)
+		state->nreaders--;
+	if (mode & FMODE_WRITE)
+		state->nwriters--;
+	if (state->nwriters == 0) {
+		if (state->nreaders == 0)
+			list_del_init(&state->inode_states);
+		/* See reclaim code */
+		list_move_tail(&state->open_states, &owner->so_states);
+	}
+	spin_unlock(&inode->i_lock);
+	newstate = 0;
+	if (state->state != 0) {
+		if (state->nreaders)
+			newstate |= FMODE_READ;
+		if (state->nwriters)
+			newstate |= FMODE_WRITE;
+		if (state->state == newstate)
+			goto out;
+		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+			return;
+	}
+out:
+	nfs4_put_open_state(state);
+	up(&owner->so_sema);
+	nfs4_put_state_owner(owner);
+	up_read(&clp->cl_sem);
+}
+
+/*
+ * Search the state->lock_states for an existing lock_owner
+ * that is compatible with current->files
+ */
+static struct nfs4_lock_state *
+__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *pos;
+	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+		if (pos->ls_owner != fl_owner)
+			continue;
+		atomic_inc(&pos->ls_count);
+		return pos;
+	}
+	return NULL;
+}
+
+struct nfs4_lock_state *
+nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *lsp;
+	read_lock(&state->state_lock);
+	lsp = __nfs4_find_lock_state(state, fl_owner);
+	read_unlock(&state->state_lock);
+	return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema
+ */
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *lsp;
+	struct nfs4_client *clp = state->owner->so_client;
+
+	lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+	if (lsp == NULL)
+		return NULL;
+	lsp->ls_flags = 0;
+	lsp->ls_seqid = 0;	/* arbitrary */
+	lsp->ls_id = -1; 
+	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+	atomic_set(&lsp->ls_count, 1);
+	lsp->ls_owner = fl_owner;
+	INIT_LIST_HEAD(&lsp->ls_locks);
+	spin_lock(&clp->cl_lock);
+	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+	spin_unlock(&clp->cl_lock);
+	return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema and clp->cl_sem
+ */
+struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+{
+	struct nfs4_lock_state * lsp;
+	
+	lsp = nfs4_find_lock_state(state, owner);
+	if (lsp == NULL)
+		lsp = nfs4_alloc_lock_state(state, owner);
+	return lsp;
+}
+
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
+ */
+void
+nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
+		struct nfs4_lock_state *lsp;
+
+		lsp = nfs4_find_lock_state(state, fl_owner);
+		if (lsp) {
+			memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+			nfs4_put_lock_state(lsp);
+			return;
+		}
+	}
+	memcpy(dst, &state->stateid, sizeof(*dst));
+}
+
+/*
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+{
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		lsp->ls_seqid++;
+}
+
+/* 
+* Check to see if the request lock (type FL_UNLK) effects the fl lock.
+*
+* fl and request must have the same posix owner
+*
+* return: 
+* 0 -> fl not effected by request
+* 1 -> fl consumed by request
+*/
+
+static int
+nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+{
+	if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
+		return 1;
+	return 0;
+}
+
+/*
+ * Post an initialized lock_state on the state->lock_states list.
+ */
+void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+	if (!list_empty(&lsp->ls_locks))
+		return;
+	atomic_inc(&lsp->ls_count);
+	write_lock(&state->state_lock);
+	list_add(&lsp->ls_locks, &state->lock_states);
+	set_bit(LK_STATE_IN_USE, &state->flags);
+	write_unlock(&state->state_lock);
+}
+
+/* 
+ * to decide to 'reap' lock state:
+ * 1) search i_flock for file_locks with fl.lock_state = to ls.
+ * 2) determine if unlock will consume found lock. 
+ * 	if so, reap
+ *
+ * 	else, don't reap.
+ *
+ */
+void
+nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+	struct inode *inode = state->inode;
+	struct file_lock *fl;
+
+	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		if (fl->fl_owner != lsp->ls_owner)
+			continue;
+		/* Exit if we find at least one lock which is not consumed */
+		if (nfs4_check_unlock(fl,request) == 0)
+			return;
+	}
+
+	write_lock(&state->state_lock);
+	list_del_init(&lsp->ls_locks);
+	if (list_empty(&state->lock_states))
+		clear_bit(LK_STATE_IN_USE, &state->flags);
+	write_unlock(&state->state_lock);
+	nfs4_put_lock_state(lsp);
+}
+
+/*
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
+ */
+void
+nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+{
+	if (!atomic_dec_and_test(&lsp->ls_count))
+		return;
+	BUG_ON (!list_empty(&lsp->ls_locks));
+	kfree(lsp);
+}
+
+/*
+* Called with sp->so_sema and clp->cl_sem held.
+*
+* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+* failed with a seqid incrementing error -
+* see comments nfs_fs.h:seqid_mutating_error()
+*/
+void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
+{
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		sp->so_seqid++;
+	/* If the server returns BAD_SEQID, unhash state_owner here */
+	if (status == -NFS4ERR_BAD_SEQID)
+		nfs4_drop_state_owner(sp);
+}
+
+static int reclaimer(void *);
+struct reclaimer_args {
+	struct nfs4_client *clp;
+	struct completion complete;
+};
+
+/*
+ * State recovery routine
+ */
+void
+nfs4_recover_state(void *data)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct reclaimer_args args = {
+		.clp = clp,
+	};
+	might_sleep();
+
+	init_completion(&args.complete);
+
+	if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
+		goto out_failed_clear;
+	wait_for_completion(&args.complete);
+	return;
+out_failed_clear:
+	set_bit(NFS4CLNT_OK, &clp->cl_state);
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+}
+
+/*
+ * Schedule a state recovery attempt
+ */
+void
+nfs4_schedule_state_recovery(struct nfs4_client *clp)
+{
+	if (!clp)
+		return;
+	if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
+		schedule_work(&clp->cl_recoverd);
+}
+
+static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct file_lock *fl;
+	int status = 0;
+
+	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
+			continue;
+		status = ops->recover_lock(state, fl);
+		if (status >= 0)
+			continue;
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+						__FUNCTION__, status);
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_RECLAIM_BAD:
+			case -NFS4ERR_RECLAIM_CONFLICT:
+				/* kill_proc(fl->fl_owner, SIGLOST, 1); */
+				break;
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out_err;
+		}
+	}
+	return 0;
+out_err:
+	return status;
+}
+
+static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
+{
+	struct nfs4_state *state;
+	struct nfs4_lock_state *lock;
+	int status = 0;
+
+	/* Note: we rely on the sp->so_states list being ordered 
+	 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
+	 * states first.
+	 * This is needed to ensure that the server won't give us any
+	 * read delegations that we have to return if, say, we are
+	 * recovering after a network partition or a reboot from a
+	 * server that doesn't support a grace period.
+	 */
+	list_for_each_entry(state, &sp->so_states, open_states) {
+		if (state->state == 0)
+			continue;
+		status = ops->recover_open(sp, state);
+		list_for_each_entry(lock, &state->lock_states, ls_locks)
+			lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+		if (status >= 0) {
+			status = nfs4_reclaim_locks(ops, state);
+			if (status < 0)
+				goto out_err;
+			list_for_each_entry(lock, &state->lock_states, ls_locks) {
+				if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
+					printk("%s: Lock reclaim failed!\n",
+							__FUNCTION__);
+			}
+			continue;
+		}
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+						__FUNCTION__, status);
+			case -ENOENT:
+			case -NFS4ERR_RECLAIM_BAD:
+			case -NFS4ERR_RECLAIM_CONFLICT:
+				/*
+				 * Open state on this file cannot be recovered
+				 * All we can do is revert to using the zero stateid.
+				 */
+				memset(state->stateid.data, 0,
+					sizeof(state->stateid.data));
+				/* Mark the file as being 'closed' */
+				state->state = 0;
+				break;
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out_err;
+		}
+	}
+	return 0;
+out_err:
+	return status;
+}
+
+static int reclaimer(void *ptr)
+{
+	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
+	struct nfs4_client *clp = args->clp;
+	struct nfs4_state_owner *sp;
+	struct nfs4_state_recovery_ops *ops;
+	int status = 0;
+
+	daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
+	allow_signal(SIGKILL);
+
+	atomic_inc(&clp->cl_count);
+	complete(&args->complete);
+
+	/* Ensure exclusive access to NFSv4 state */
+	lock_kernel();
+	down_write(&clp->cl_sem);
+	/* Are there any NFS mounts out there? */
+	if (list_empty(&clp->cl_superblocks))
+		goto out;
+restart_loop:
+	status = nfs4_proc_renew(clp);
+	switch (status) {
+		case 0:
+		case -NFS4ERR_CB_PATH_DOWN:
+			goto out;
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_LEASE_MOVED:
+			ops = &nfs4_reboot_recovery_ops;
+			break;
+		default:
+			ops = &nfs4_network_partition_recovery_ops;
+	};
+	status = __nfs4_init_client(clp);
+	if (status)
+		goto out_error;
+	/* Mark all delegations for reclaim */
+	nfs_delegation_mark_reclaim(clp);
+	/* Note: list is protected by exclusive lock on cl->cl_sem */
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		status = nfs4_reclaim_open_state(ops, sp);
+		if (status < 0) {
+			if (status == -NFS4ERR_NO_GRACE) {
+				ops = &nfs4_network_partition_recovery_ops;
+				status = nfs4_reclaim_open_state(ops, sp);
+			}
+			if (status == -NFS4ERR_STALE_CLIENTID)
+				goto restart_loop;
+			if (status == -NFS4ERR_EXPIRED)
+				goto restart_loop;
+		}
+	}
+	nfs_delegation_reap_unclaimed(clp);
+out:
+	set_bit(NFS4CLNT_OK, &clp->cl_state);
+	up_write(&clp->cl_sem);
+	unlock_kernel();
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+	if (status == -NFS4ERR_CB_PATH_DOWN)
+		nfs_handle_cb_pathdown(clp);
+	nfs4_put_client(clp);
+	return 0;
+out_error:
+	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
+				NIPQUAD(clp->cl_addr.s_addr), -status);
+	goto out;
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 00000000000..5f4de05763c
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,4034 @@
+/*
+ *  fs/nfs/nfs4xdr.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ * 
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+static int nfs_stat_to_errno(int);
+
+/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
+#ifdef DEBUG
+#define NFS4_MAXTAGLEN		20
+#else
+#define NFS4_MAXTAGLEN		0
+#endif
+
+/* lock,open owner id: 
+ * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT  >> 2)
+ */
+#define owner_id_maxsz          (1 + 1)
+#define compound_encode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
+#define compound_decode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
+#define op_encode_hdr_maxsz	(1)
+#define op_decode_hdr_maxsz	(2)
+#define encode_putfh_maxsz	(op_encode_hdr_maxsz + 1 + \
+				(NFS4_FHSIZE >> 2))
+#define decode_putfh_maxsz	(op_decode_hdr_maxsz)
+#define encode_putrootfh_maxsz	(op_encode_hdr_maxsz)
+#define decode_putrootfh_maxsz	(op_decode_hdr_maxsz)
+#define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+#define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+				((3+NFS4_FHSIZE) >> 2))
+#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
+#define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+#define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
+#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
+                                nfs4_fattr_bitmap_maxsz)
+#define encode_savefh_maxsz     (op_encode_hdr_maxsz)
+#define decode_savefh_maxsz     (op_decode_hdr_maxsz)
+#define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
+#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
+#define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+#define decode_renew_maxsz	(op_decode_hdr_maxsz)
+#define encode_setclientid_maxsz \
+				(op_encode_hdr_maxsz + \
+				4 /*server->ip_addr*/ + \
+				1 /*Netid*/ + \
+				6 /*uaddr*/ + \
+				6 + (NFS4_VERIFIER_SIZE >> 2))
+#define decode_setclientid_maxsz \
+				(op_decode_hdr_maxsz + \
+				2 + \
+				1024) /* large value for CLID_INUSE */
+#define encode_setclientid_confirm_maxsz \
+				(op_encode_hdr_maxsz + \
+				3 + (NFS4_VERIFIER_SIZE >> 2))
+#define decode_setclientid_confirm_maxsz \
+				(op_decode_hdr_maxsz)
+#define encode_lookup_maxsz	(op_encode_hdr_maxsz + \
+				1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_remove_maxsz	(op_encode_hdr_maxsz + \
+				nfs4_name_maxsz)
+#define encode_rename_maxsz	(op_encode_hdr_maxsz + \
+				2 * nfs4_name_maxsz)
+#define decode_rename_maxsz	(op_decode_hdr_maxsz + 5 + 5)
+#define encode_link_maxsz	(op_encode_hdr_maxsz + \
+				nfs4_name_maxsz)
+#define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
+#define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+				1 + nfs4_name_maxsz + \
+				nfs4_path_maxsz + \
+				nfs4_fattr_bitmap_maxsz)
+#define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
+#define encode_create_maxsz	(op_encode_hdr_maxsz + \
+				2 + nfs4_name_maxsz + \
+				nfs4_fattr_bitmap_maxsz)
+#define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
+#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
+#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
+#define NFS4_dec_compound_sz	(1024)  /* XXX: large enough? */
+#define NFS4_enc_read_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 7)
+#define NFS4_dec_read_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 2)
+#define NFS4_enc_readlink_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz)
+#define NFS4_dec_readlink_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz)
+#define NFS4_enc_readdir_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 9)
+#define NFS4_dec_readdir_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 2)
+#define NFS4_enc_write_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 8)
+#define NFS4_dec_write_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 4)
+#define NFS4_enc_commit_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 3)
+#define NFS4_dec_commit_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 2)
+#define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + \
+                                13 + 3 + 2 + 64 + \
+                                encode_getattr_maxsz + \
+                                encode_getfh_maxsz)
+#define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
+                                decode_getattr_maxsz + \
+                                decode_getfh_maxsz)
+#define NFS4_enc_open_confirm_sz      \
+                                (compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 5)
+#define NFS4_dec_open_confirm_sz        (compound_decode_hdr_maxsz + \
+                                        decode_putfh_maxsz + \
+                                        op_decode_hdr_maxsz + 4)
+#define NFS4_enc_open_noattr_sz	(compound_encode_hdr_maxsz + \
+					encode_putfh_maxsz + \
+					op_encode_hdr_maxsz + \
+					11)
+#define NFS4_dec_open_noattr_sz	(compound_decode_hdr_maxsz + \
+					decode_putfh_maxsz + \
+					op_decode_hdr_maxsz + \
+					4 + 5 + 2 + 3)
+#define NFS4_enc_open_downgrade_sz \
+				(compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 7)
+#define NFS4_dec_open_downgrade_sz \
+				(compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                op_decode_hdr_maxsz + 4)
+#define NFS4_enc_close_sz       (compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 5)
+#define NFS4_dec_close_sz       (compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                op_decode_hdr_maxsz + 4)
+#define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 4 + \
+                                nfs4_fattr_bitmap_maxsz + \
+                                encode_getattr_maxsz)
+#define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                op_decode_hdr_maxsz + 3)
+#define NFS4_enc_fsinfo_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_fsinfo_maxsz)
+#define NFS4_dec_fsinfo_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_fsinfo_maxsz)
+#define NFS4_enc_renew_sz	(compound_encode_hdr_maxsz + \
+				encode_renew_maxsz)
+#define NFS4_dec_renew_sz	(compound_decode_hdr_maxsz + \
+				decode_renew_maxsz)
+#define NFS4_enc_setclientid_sz	(compound_encode_hdr_maxsz + \
+				encode_setclientid_maxsz)
+#define NFS4_dec_setclientid_sz	(compound_decode_hdr_maxsz + \
+				decode_setclientid_maxsz)
+#define NFS4_enc_setclientid_confirm_sz \
+				(compound_encode_hdr_maxsz + \
+				encode_setclientid_confirm_maxsz + \
+				encode_putrootfh_maxsz + \
+				encode_fsinfo_maxsz)
+#define NFS4_dec_setclientid_confirm_sz \
+				(compound_decode_hdr_maxsz + \
+				decode_setclientid_confirm_maxsz + \
+				decode_putrootfh_maxsz + \
+				decode_fsinfo_maxsz)
+#define NFS4_enc_lock_sz        (compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz + \
+				op_encode_hdr_maxsz + \
+				1 + 1 + 2 + 2 + \
+				1 + 4 + 1 + 2 + \
+				owner_id_maxsz)
+#define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_getattr_maxsz + \
+				op_decode_hdr_maxsz + \
+				2 + 2 + 1 + 2 + \
+				owner_id_maxsz)
+#define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz + \
+				op_encode_hdr_maxsz + \
+				1 + 2 + 2 + 2 + \
+				owner_id_maxsz)
+#define NFS4_dec_lockt_sz       (NFS4_dec_lock_sz)
+#define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz + \
+				op_encode_hdr_maxsz + \
+				1 + 1 + 4 + 2 + 2)
+#define NFS4_dec_locku_sz       (compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_getattr_maxsz + \
+				op_decode_hdr_maxsz + 4)
+#define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 1)
+#define NFS4_dec_access_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 2)
+#define NFS4_enc_getattr_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_getattr_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_getattr_maxsz)
+#define NFS4_enc_lookup_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_lookup_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_lookup_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
+#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
+				encode_putrootfh_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \
+				decode_putrootfh_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
+#define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_remove_maxsz)
+#define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 5)
+#define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
+				encode_putfh_maxsz + \
+				encode_rename_maxsz)
+#define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
+				decode_putfh_maxsz + \
+				decode_rename_maxsz)
+#define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
+				encode_putfh_maxsz + \
+				encode_link_maxsz)
+#define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
+				decode_putfh_maxsz + \
+				decode_link_maxsz)
+#define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_symlink_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_symlink_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_symlink_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
+#define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_create_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_create_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
+#define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_pathconf_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				decode_getattr_maxsz)
+#define NFS4_enc_statfs_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_statfs_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + 12)
+#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
+				decode_getattr_maxsz)
+#define NFS4_enc_delegreturn_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_delegreturn_maxsz)
+#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
+				decode_delegreturn_maxsz)
+
+static struct {
+	unsigned int	mode;
+	unsigned int	nfs2type;
+} nfs_type2fmt[] = {
+	{ 0,		NFNON	     },
+	{ S_IFREG,	NFREG	     },
+	{ S_IFDIR,	NFDIR	     },
+	{ S_IFBLK,	NFBLK	     },
+	{ S_IFCHR,	NFCHR	     },
+	{ S_IFLNK,	NFLNK	     },
+	{ S_IFSOCK,	NFSOCK	     },
+	{ S_IFIFO,	NFFIFO	     },
+	{ 0,		NFNON	     },
+	{ 0,		NFNON	     },
+};
+
+struct compound_hdr {
+	int32_t		status;
+	uint32_t	nops;
+	uint32_t	taglen;
+	char *		tag;
+};
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITE64(n)               do {				\
+	*p++ = htonl((uint32_t)((n) >> 32));				\
+	*p++ = htonl((uint32_t)(n));					\
+} while (0)
+#define WRITEMEM(ptr,nbytes)     do {				\
+	p = xdr_encode_opaque_fixed(p, ptr, nbytes);		\
+} while (0)
+
+#define RESERVE_SPACE(nbytes)	do {				\
+	p = xdr_reserve_space(xdr, nbytes);			\
+	if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+	BUG_ON(!p);						\
+} while (0)
+
+static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, 4 + len);
+	BUG_ON(p == NULL);
+	xdr_encode_opaque(p, str, len);
+}
+
+static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+	uint32_t *p;
+
+	dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
+	BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
+	RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
+	WRITE32(hdr->taglen);
+	WRITEMEM(hdr->tag, hdr->taglen);
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->nops);
+	return 0;
+}
+
+static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+	BUG_ON(p == NULL);
+	xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
+}
+
+static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
+{
+	char owner_name[IDMAP_NAMESZ];
+	char owner_group[IDMAP_NAMESZ];
+	int owner_namelen = 0;
+	int owner_grouplen = 0;
+	uint32_t *p;
+	uint32_t *q;
+	int len;
+	uint32_t bmval0 = 0;
+	uint32_t bmval1 = 0;
+	int status;
+
+	/*
+	 * We reserve enough space to write the entire attribute buffer at once.
+	 * In the worst-case, this would be
+	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+	 *          = 36 bytes, plus any contribution from variable-length fields
+	 *            such as owner/group/acl's.
+	 */
+	len = 16;
+
+	/* Sigh */
+	if (iap->ia_valid & ATTR_SIZE)
+		len += 8;
+	if (iap->ia_valid & ATTR_MODE)
+		len += 4;
+	if (iap->ia_valid & ATTR_UID) {
+		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		if (owner_namelen < 0) {
+			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
+			       iap->ia_uid);
+			/* XXX */
+			strcpy(owner_name, "nobody");
+			owner_namelen = sizeof("nobody") - 1;
+			/* goto out; */
+		}
+		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		if (owner_grouplen < 0) {
+			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
+			       iap->ia_gid);
+			strcpy(owner_group, "nobody");
+			owner_grouplen = sizeof("nobody") - 1;
+			/* goto out; */
+		}
+		len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
+	}
+	if (iap->ia_valid & ATTR_ATIME_SET)
+		len += 16;
+	else if (iap->ia_valid & ATTR_ATIME)
+		len += 4;
+	if (iap->ia_valid & ATTR_MTIME_SET)
+		len += 16;
+	else if (iap->ia_valid & ATTR_MTIME)
+		len += 4;
+	RESERVE_SPACE(len);
+
+	/*
+	 * We write the bitmap length now, but leave the bitmap and the attribute
+	 * buffer length to be backfilled at the end of this routine.
+	 */
+	WRITE32(2);
+	q = p;
+	p += 3;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		bmval0 |= FATTR4_WORD0_SIZE;
+		WRITE64(iap->ia_size);
+	}
+	if (iap->ia_valid & ATTR_MODE) {
+		bmval1 |= FATTR4_WORD1_MODE;
+		WRITE32(iap->ia_mode);
+	}
+	if (iap->ia_valid & ATTR_UID) {
+		bmval1 |= FATTR4_WORD1_OWNER;
+		WRITE32(owner_namelen);
+		WRITEMEM(owner_name, owner_namelen);
+	}
+	if (iap->ia_valid & ATTR_GID) {
+		bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+		WRITE32(owner_grouplen);
+		WRITEMEM(owner_group, owner_grouplen);
+	}
+	if (iap->ia_valid & ATTR_ATIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime.tv_sec);
+		WRITE32(iap->ia_mtime.tv_nsec);
+	}
+	else if (iap->ia_valid & ATTR_ATIME) {
+		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (iap->ia_valid & ATTR_MTIME_SET) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_CLIENT_TIME);
+		WRITE32(0);
+		WRITE32(iap->ia_mtime.tv_sec);
+		WRITE32(iap->ia_mtime.tv_nsec);
+	}
+	else if (iap->ia_valid & ATTR_MTIME) {
+		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+		WRITE32(NFS4_SET_TO_SERVER_TIME);
+	}
+	
+	/*
+	 * Now we backfill the bitmap and the attribute buffer length.
+	 */
+	if (len != ((char *)p - (char *)q) + 4) {
+		printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
+				len, ((char *)p - (char *)q) + 4);
+		BUG();
+	}
+	len = (char *)p - (char *)q - 12;
+	*q++ = htonl(bmval0);
+	*q++ = htonl(bmval1);
+	*q++ = htonl(len);
+
+	status = 0;
+/* out: */
+	return status;
+}
+
+static int encode_access(struct xdr_stream *xdr, u32 access)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8);
+	WRITE32(OP_ACCESS);
+	WRITE32(access);
+	
+	return 0;
+}
+
+static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	WRITE32(OP_CLOSE);
+	WRITE32(arg->seqid);
+	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	
+	return 0;
+}
+
+static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args)
+{
+	uint32_t *p;
+        
+        RESERVE_SPACE(16);
+        WRITE32(OP_COMMIT);
+        WRITE64(args->offset);
+        WRITE32(args->count);
+
+        return 0;
+}
+
+static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create)
+{
+	uint32_t *p;
+	
+	RESERVE_SPACE(8);
+	WRITE32(OP_CREATE);
+	WRITE32(create->ftype);
+
+	switch (create->ftype) {
+	case NF4LNK:
+		RESERVE_SPACE(4 + create->u.symlink->len);
+		WRITE32(create->u.symlink->len);
+		WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+		break;
+
+	case NF4BLK: case NF4CHR:
+		RESERVE_SPACE(8);
+		WRITE32(create->u.device.specdata1);
+		WRITE32(create->u.device.specdata2);
+		break;
+
+	default:
+		break;
+	}
+
+	RESERVE_SPACE(4 + create->name->len);
+	WRITE32(create->name->len);
+	WRITEMEM(create->name->name, create->name->len);
+
+	return encode_attrs(xdr, create->attrs, create->server);
+}
+
+static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap)
+{
+        uint32_t *p;
+
+        RESERVE_SPACE(12);
+        WRITE32(OP_GETATTR);
+        WRITE32(1);
+        WRITE32(bitmap);
+        return 0;
+}
+
+static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1)
+{
+        uint32_t *p;
+
+        RESERVE_SPACE(16);
+        WRITE32(OP_GETATTR);
+        WRITE32(2);
+        WRITE32(bm0);
+        WRITE32(bm1);
+        return 0;
+}
+
+static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
+{
+	extern u32 nfs4_fattr_bitmap[];
+
+	return encode_getattr_two(xdr,
+			bitmask[0] & nfs4_fattr_bitmap[0],
+			bitmask[1] & nfs4_fattr_bitmap[1]);
+}
+
+static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
+{
+	extern u32 nfs4_fsinfo_bitmap[];
+
+	return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+			bitmask[1] & nfs4_fsinfo_bitmap[1]);
+}
+
+static int encode_getfh(struct xdr_stream *xdr)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_GETFH);
+
+	return 0;
+}
+
+static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8 + name->len);
+	WRITE32(OP_LINK);
+	WRITE32(name->len);
+	WRITEMEM(name->name, name->len);
+	
+	return 0;
+}
+
+/*
+ * opcode,type,reclaim,offset,length,new_lock_owner = 32
+ * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+ */
+static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+	uint32_t *p;
+	struct nfs_lock_opargs *opargs = arg->u.lock;
+
+	RESERVE_SPACE(32);
+	WRITE32(OP_LOCK);
+	WRITE32(arg->type); 
+	WRITE32(opargs->reclaim);
+	WRITE64(arg->offset);
+	WRITE64(arg->length);
+	WRITE32(opargs->new_lock_owner);
+	if (opargs->new_lock_owner){
+		struct nfs_open_to_lock *ol = opargs->u.open_lock;
+
+		RESERVE_SPACE(40);
+		WRITE32(ol->open_seqid);
+		WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
+		WRITE32(ol->lock_seqid);
+		WRITE64(ol->lock_owner.clientid);
+		WRITE32(4);
+		WRITE32(ol->lock_owner.id);
+	}
+	else {
+		struct nfs_exist_lock *el = opargs->u.exist_lock;
+
+		RESERVE_SPACE(20);
+		WRITEMEM(&el->stateid, sizeof(el->stateid));
+		WRITE32(el->seqid);
+	}
+
+	return 0;
+}
+
+static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+	uint32_t *p;
+	struct nfs_lowner *opargs = arg->u.lockt;
+
+	RESERVE_SPACE(40);
+	WRITE32(OP_LOCKT);
+	WRITE32(arg->type);
+	WRITE64(arg->offset);
+	WRITE64(arg->length);
+	WRITE64(opargs->clientid);
+	WRITE32(4);
+	WRITE32(opargs->id);
+
+	return 0;
+}
+
+static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+	uint32_t *p;
+	struct nfs_locku_opargs *opargs = arg->u.locku;
+
+	RESERVE_SPACE(44);
+	WRITE32(OP_LOCKU);
+	WRITE32(arg->type);
+	WRITE32(opargs->seqid);
+	WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
+	WRITE64(arg->offset);
+	WRITE64(arg->length);
+
+	return 0;
+}
+
+static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
+{
+	int len = name->len;
+	uint32_t *p;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_LOOKUP);
+	WRITE32(len);
+	WRITEMEM(name->name, len);
+
+	return 0;
+}
+
+static void encode_share_access(struct xdr_stream *xdr, int open_flags)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8);
+	switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
+		case FMODE_READ:
+			WRITE32(NFS4_SHARE_ACCESS_READ);
+			break;
+		case FMODE_WRITE:
+			WRITE32(NFS4_SHARE_ACCESS_WRITE);
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			WRITE32(NFS4_SHARE_ACCESS_BOTH);
+			break;
+		default:
+			BUG();
+	}
+	WRITE32(0);		/* for linux, share_deny = 0 always */
+}
+
+static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+	uint32_t *p;
+ /*
+ * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
+ * owner 4 = 32
+ */
+	RESERVE_SPACE(8);
+	WRITE32(OP_OPEN);
+	WRITE32(arg->seqid);
+	encode_share_access(xdr, arg->open_flags);
+	RESERVE_SPACE(16);
+	WRITE64(arg->clientid);
+	WRITE32(4);
+	WRITE32(arg->id);
+}
+
+static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	switch(arg->open_flags & O_EXCL) {
+		case 0:
+			WRITE32(NFS4_CREATE_UNCHECKED);
+			encode_attrs(xdr, arg->u.attrs, arg->server);
+			break;
+		default:
+			WRITE32(NFS4_CREATE_EXCLUSIVE);
+			encode_nfs4_verifier(xdr, &arg->u.verifier);
+	}
+}
+
+static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	switch (arg->open_flags & O_CREAT) {
+		case 0:
+			WRITE32(NFS4_OPEN_NOCREATE);
+			break;
+		default:
+			BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
+			WRITE32(NFS4_OPEN_CREATE);
+			encode_createmode(xdr, arg);
+	}
+}
+
+static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	switch (delegation_type) {
+		case 0:
+			WRITE32(NFS4_OPEN_DELEGATE_NONE);
+			break;
+		case FMODE_READ:
+			WRITE32(NFS4_OPEN_DELEGATE_READ);
+			break;
+		case FMODE_WRITE|FMODE_READ:
+			WRITE32(NFS4_OPEN_DELEGATE_WRITE);
+			break;
+		default:
+			BUG();
+	}
+}
+
+static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(NFS4_OPEN_CLAIM_NULL);
+	encode_string(xdr, name->len, name->name);
+}
+
+static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
+	encode_delegation_type(xdr, type);
+}
+
+static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4+sizeof(stateid->data));
+	WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+	WRITEMEM(stateid->data, sizeof(stateid->data));
+	encode_string(xdr, name->len, name->name);
+}
+
+static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+	encode_openhdr(xdr, arg);
+	encode_opentype(xdr, arg);
+	switch (arg->claim) {
+		case NFS4_OPEN_CLAIM_NULL:
+			encode_claim_null(xdr, arg->name);
+			break;
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			encode_claim_previous(xdr, arg->u.delegation_type);
+			break;
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+			encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
+			break;
+		default:
+			BUG();
+	}
+	return 0;
+}
+
+static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	WRITE32(OP_OPEN_CONFIRM);
+	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITE32(arg->seqid);
+
+	return 0;
+}
+
+static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	WRITE32(OP_OPEN_DOWNGRADE);
+	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITE32(arg->seqid);
+	encode_share_access(xdr, arg->open_flags);
+	return 0;
+}
+
+static int
+encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+	int len = fh->size;
+	uint32_t *p;
+
+	RESERVE_SPACE(8 + len);
+	WRITE32(OP_PUTFH);
+	WRITE32(len);
+	WRITEMEM(fh->data, len);
+
+	return 0;
+}
+
+static int encode_putrootfh(struct xdr_stream *xdr)
+{
+        uint32_t *p;
+        
+        RESERVE_SPACE(4);
+        WRITE32(OP_PUTROOTFH);
+
+        return 0;
+}
+
+static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
+{
+	extern nfs4_stateid zero_stateid;
+	nfs4_stateid stateid;
+	uint32_t *p;
+
+	RESERVE_SPACE(16);
+	if (ctx->state != NULL) {
+		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+		WRITEMEM(stateid.data, sizeof(stateid.data));
+	} else
+		WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+}
+
+static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_READ);
+
+	encode_stateid(xdr, args->context);
+
+	RESERVE_SPACE(12);
+	WRITE64(args->offset);
+	WRITE32(args->count);
+
+	return 0;
+}
+
+static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	int replen;
+	uint32_t *p;
+
+	RESERVE_SPACE(32+sizeof(nfs4_verifier));
+	WRITE32(OP_READDIR);
+	WRITE64(readdir->cookie);
+	WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data));
+	WRITE32(readdir->count >> 1);  /* We're not doing readdirplus */
+	WRITE32(readdir->count);
+	WRITE32(2);
+	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+		WRITE32(0);
+		WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
+	} else {
+		WRITE32(FATTR4_WORD0_FILEID);
+		WRITE32(0);
+	}
+
+	/* set up reply kvec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READDIR + status + verifer(2)  = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
+			 readdir->pgbase, readdir->count);
+
+	return 0;
+}
+
+static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_READLINK);
+
+	/* set up reply kvec
+	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
+	 *      + OP_READLINK + status + string length = 8
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + 8) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages,
+			readlink->pgbase, readlink->pglen);
+	
+	return 0;
+}
+
+static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8 + name->len);
+	WRITE32(OP_REMOVE);
+	WRITE32(name->len);
+	WRITEMEM(name->name, name->len);
+
+	return 0;
+}
+
+static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(8 + oldname->len);
+	WRITE32(OP_RENAME);
+	WRITE32(oldname->len);
+	WRITEMEM(oldname->name, oldname->len);
+	
+	RESERVE_SPACE(4 + newname->len);
+	WRITE32(newname->len);
+	WRITEMEM(newname->name, newname->len);
+
+	return 0;
+}
+
+static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(12);
+	WRITE32(OP_RENEW);
+	WRITE64(client_stateid->cl_clientid);
+
+	return 0;
+}
+
+static int
+encode_savefh(struct xdr_stream *xdr)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_SAVEFH);
+
+	return 0;
+}
+
+static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server)
+{
+	int status;
+	uint32_t *p;
+	
+        RESERVE_SPACE(4+sizeof(arg->stateid.data));
+        WRITE32(OP_SETATTR);
+	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+
+        if ((status = encode_attrs(xdr, arg->iap, server)))
+		return status;
+
+        return 0;
+}
+
+static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data));
+	WRITE32(OP_SETCLIENTID);
+	WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data));
+
+	encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
+	RESERVE_SPACE(4);
+	WRITE32(setclientid->sc_prog);
+	encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
+	encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
+	RESERVE_SPACE(4);
+	WRITE32(setclientid->sc_cb_ident);
+
+	return 0;
+}
+
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+{
+        uint32_t *p;
+
+        RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data));
+        WRITE32(OP_SETCLIENTID_CONFIRM);
+        WRITE64(client_state->cl_clientid);
+        WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data));
+
+        return 0;
+}
+
+static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_WRITE);
+
+	encode_stateid(xdr, args->context);
+
+	RESERVE_SPACE(16);
+	WRITE64(args->offset);
+	WRITE32(args->stable);
+	WRITE32(args->count);
+
+	xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
+
+	return 0;
+}
+
+static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(20);
+
+	WRITE32(OP_DELEGRETURN);
+	WRITEMEM(stateid->data, sizeof(stateid->data));
+	return 0;
+
+}
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+/*
+ * Encode an ACCESS request
+ */
+static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct nfs4_accessargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->fh)) == 0)
+		status = encode_access(&xdr, args->access);
+	return status;
+}
+
+/*
+ * Encode LOOKUP request
+ */
+static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 4,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+		goto out;
+	if ((status = encode_lookup(&xdr, args->name)) != 0)
+		goto out;
+	if ((status = encode_getfh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
+	return status;
+}
+
+/*
+ * Encode LOOKUP_ROOT request
+ */
+static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_root_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 3,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putrootfh(&xdr)) != 0)
+		goto out;
+	if ((status = encode_getfh(&xdr)) == 0)
+		status = encode_getfattr(&xdr, args->bitmask);
+out:
+	return status;
+}
+
+/*
+ * Encode REMOVE request
+ */
+static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct nfs4_remove_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->fh)) == 0)
+		status = encode_remove(&xdr, args->name);
+	return status;
+}
+
+/*
+ * Encode RENAME request
+ */
+static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct nfs4_rename_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 4,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->old_dir)) != 0)
+		goto out;
+	if ((status = encode_savefh(&xdr)) != 0)
+		goto out;
+	if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
+		goto out;
+	status = encode_rename(&xdr, args->old_name, args->new_name);
+out:
+	return status;
+}
+
+/*
+ * Encode LINK request
+ */
+static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs4_link_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 4,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->fh)) != 0)
+		goto out;
+	if ((status = encode_savefh(&xdr)) != 0)
+		goto out;
+	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+		goto out;
+	status = encode_link(&xdr, args->name);
+out:
+	return status;
+}
+
+/*
+ * Encode CREATE request
+ */
+static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 4,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+		goto out;
+	if ((status = encode_create(&xdr, args)) != 0)
+		goto out;
+	if ((status = encode_getfh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
+	return status;
+}
+
+/*
+ * Encode SYMLINK request
+ */
+static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+{
+	return nfs4_xdr_enc_create(req, p, args);
+}
+
+/*
+ * Encode GETATTR request
+ */
+static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->fh)) == 0)
+		status = encode_getfattr(&xdr, args->bitmask);
+	return status;
+}
+
+/*
+ * Encode a CLOSE request
+ */
+static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 2,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if(status)
+                goto out;
+        status = encode_close(&xdr, args);
+out:
+        return status;
+}
+
+/*
+ * Encode an OPEN request
+ */
+static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 4,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_open(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfh(&xdr);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
+	return status;
+}
+
+/*
+ * Encode an OPEN_CONFIRM request
+ */
+static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_open_confirm(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode an OPEN request with no attributes.
+ */
+static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_open(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode an OPEN_DOWNGRADE request
+ */
+static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops	= 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_open_downgrade(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode a LOCK request
+ */
+static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_lock(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode a LOCKT request
+ */
+static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_lockt(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode a LOCKU request
+ */
+static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_locku(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * Encode a READLINK request
+ */
+static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readlink *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_readlink(&xdr, args, req);
+out:
+	return status;
+}
+
+/*
+ * Encode a READDIR request
+ */
+static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readdir_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if(status)
+		goto out;
+	status = encode_readdir(&xdr, args, req);
+out:
+	return status;
+}
+
+/*
+ * Encode a READ request
+ */
+static int nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int replen, status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_read(&xdr, args);
+	if (status)
+		goto out;
+
+	/* set up reply kvec
+	 *    toplevel status + taglen=0 + rescount + OP_PUTFH + status
+	 *       + OP_READ + status + eof + datalen = 9
+	 */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 args->pages, args->pgbase, args->count);
+out:
+	return status;
+}
+
+/*
+ * Encode an SETATTR request
+ */
+static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, uint32_t *p, struct nfs_setattrargs *args)
+
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 3,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if(status)
+                goto out;
+        status = encode_setattr(&xdr, args, args->server);
+        if(status)
+                goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
+        return status;
+}
+
+/*
+ * Encode a WRITE request
+ */
+static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_write(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ *  a COMMIT request
+ */
+static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_commit(&xdr, args);
+out:
+	return status;
+}
+
+/*
+ * FSINFO request
+ */
+static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fsinfo_arg *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops	= 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (!status)
+		status = encode_fsinfo(&xdr, args->bitmask);
+	return status;
+}
+
+/*
+ * a PATHCONF request
+ */
+static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
+{
+	extern u32 nfs4_pathconf_bitmap[2];
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (!status)
+		status = encode_getattr_one(&xdr,
+				args->bitmask[0] & nfs4_pathconf_bitmap[0]);
+	return status;
+}
+
+/*
+ * a STATFS request
+ */
+static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
+{
+	extern u32 nfs4_statfs_bitmap[];
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status == 0)
+		status = encode_getattr_two(&xdr,
+				args->bitmask[0] & nfs4_statfs_bitmap[0],
+				args->bitmask[1] & nfs4_statfs_bitmap[1]);
+	return status;
+}
+
+/*
+ * GETATTR_BITMAP request
+ */
+static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const struct nfs_fh *fhandle)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, fhandle);
+	if (status == 0)
+		status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+				FATTR4_WORD0_LINK_SUPPORT|
+				FATTR4_WORD0_SYMLINK_SUPPORT|
+				FATTR4_WORD0_ACLSUPPORT);
+	return status;
+}
+
+/*
+ * a RENEW request
+ */
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops	= 1,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	return encode_renew(&xdr, clp);
+}
+
+/*
+ * a SETCLIENTID request
+ */
+static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nfs4_setclientid *sc)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops	= 1,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	return encode_setclientid(&xdr, sc);
+}
+
+/*
+ * a SETCLIENTID_CONFIRM request
+ */
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops	= 3,
+	};
+	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_setclientid_confirm(&xdr, clp);
+	if (!status)
+		status = encode_putrootfh(&xdr);
+	if (!status)
+		status = encode_fsinfo(&xdr, lease_bitmap);
+	return status;
+}
+
+/*
+ * DELEGRETURN request
+ */
+static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.nops = 2,
+	};
+	int status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	if ((status = encode_putfh(&xdr, args->fhandle)) == 0)
+		status = encode_delegreturn(&xdr, args->stateid);
+	return status;
+}
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {			\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define READTIME(x)       do {			\
+	p++;					\
+	(x.tv_sec) = ntohl(*p++);		\
+	(x.tv_nsec) = ntohl(*p++);		\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+#define READ_BUF(nbytes)  do { \
+	p = xdr_inline_decode(xdr, nbytes); \
+	if (!p) { \
+		printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \
+			       	__FUNCTION__, __LINE__); \
+		return -EIO; \
+	} \
+} while (0)
+
+static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string)
+{
+	uint32_t *p;
+
+	READ_BUF(4);
+	READ32(*len);
+	READ_BUF(*len);
+	*string = (char *)p;
+	return 0;
+}
+
+static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+	uint32_t *p;
+
+	READ_BUF(8);
+	READ32(hdr->status);
+	READ32(hdr->taglen);
+	
+	READ_BUF(hdr->taglen + 4);
+	hdr->tag = (char *)p;
+	p += XDR_QUADLEN(hdr->taglen);
+	READ32(hdr->nops);
+	return 0;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+	uint32_t *p;
+	uint32_t opnum;
+	int32_t nfserr;
+
+	READ_BUF(8);
+	READ32(opnum);
+	if (opnum != expected) {
+		printk(KERN_NOTICE
+				"nfs4_decode_op_hdr: Server returned operation"
+			       	" %d but we issued a request for %d\n",
+				opnum, expected);
+		return -EIO;
+	}
+	READ32(nfserr);
+	if (nfserr != NFS_OK)
+		return -nfs_stat_to_errno(nfserr);
+	return 0;
+}
+
+/* Dummy routine */
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+{
+	uint32_t *p;
+	uint32_t strlen;
+	char *str;
+
+	READ_BUF(12);
+	return decode_opaque_inline(xdr, &strlen, &str);
+}
+
+static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+	uint32_t bmlen, *p;
+
+	READ_BUF(4);
+	READ32(bmlen);
+
+	bitmap[0] = bitmap[1] = 0;
+	READ_BUF((bmlen << 2));
+	if (bmlen > 0) {
+		READ32(bitmap[0]);
+		if (bmlen > 1)
+			READ32(bitmap[1]);
+	}
+	return 0;
+}
+
+static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, uint32_t **savep)
+{
+	uint32_t *p;
+
+	READ_BUF(4);
+	READ32(*attrlen);
+	*savep = xdr->p;
+	return 0;
+}
+
+static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
+{
+	if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
+		decode_attr_bitmap(xdr, bitmask);
+		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+	} else
+		bitmask[0] = bitmask[1] = 0;
+	dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]);
+	return 0;
+}
+
+static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
+{
+	uint32_t *p;
+
+	*type = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
+		READ_BUF(4);
+		READ32(*type);
+		if (*type < NF4REG || *type > NF4NAMEDATTR) {
+			dprintk("%s: bad type %d\n", __FUNCTION__, *type);
+			return -EIO;
+		}
+		bitmap[0] &= ~FATTR4_WORD0_TYPE;
+	}
+	dprintk("%s: type=0%o\n", __FUNCTION__, nfs_type2fmt[*type].nfs2type);
+	return 0;
+}
+
+static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
+{
+	uint32_t *p;
+
+	*change = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
+		READ_BUF(8);
+		READ64(*change);
+		bitmap[0] &= ~FATTR4_WORD0_CHANGE;
+	}
+	dprintk("%s: change attribute=%Lu\n", __FUNCTION__,
+			(unsigned long long)*change);
+	return 0;
+}
+
+static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
+{
+	uint32_t *p;
+
+	*size = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
+		READ_BUF(8);
+		READ64(*size);
+		bitmap[0] &= ~FATTR4_WORD0_SIZE;
+	}
+	dprintk("%s: file size=%Lu\n", __FUNCTION__, (unsigned long long)*size);
+	return 0;
+}
+
+static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
+		READ_BUF(4);
+		READ32(*res);
+		bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
+	}
+	dprintk("%s: link support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
+	return 0;
+}
+
+static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
+		READ_BUF(4);
+		READ32(*res);
+		bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
+	}
+	dprintk("%s: symlink support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
+	return 0;
+}
+
+static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid)
+{
+	uint32_t *p;
+
+	fsid->major = 0;
+	fsid->minor = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
+		READ_BUF(16);
+		READ64(fsid->major);
+		READ64(fsid->minor);
+		bitmap[0] &= ~FATTR4_WORD0_FSID;
+	}
+	dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __FUNCTION__,
+			(unsigned long long)fsid->major,
+			(unsigned long long)fsid->minor);
+	return 0;
+}
+
+static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+
+	*res = 60;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
+		READ_BUF(4);
+		READ32(*res);
+		bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
+	}
+	dprintk("%s: file size=%u\n", __FUNCTION__, (unsigned int)*res);
+	return 0;
+}
+
+static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+
+	*res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
+		READ_BUF(4);
+		READ32(*res);
+		bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
+	}
+	dprintk("%s: ACLs supported=%u\n", __FUNCTION__, (unsigned int)*res);
+	return 0;
+}
+
+static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
+{
+	uint32_t *p;
+
+	*fileid = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
+		READ_BUF(8);
+		READ64(*fileid);
+		bitmap[0] &= ~FATTR4_WORD0_FILEID;
+	}
+	dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid);
+	return 0;
+}
+
+static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
+	}
+	dprintk("%s: files avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
+	}
+	dprintk("%s: files free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
+	}
+	dprintk("%s: files total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
+	}
+	dprintk("%s: maxfilesize=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*maxlink = 1;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
+		READ_BUF(4);
+		READ32(*maxlink);
+		bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
+	}
+	dprintk("%s: maxlink=%u\n", __FUNCTION__, *maxlink);
+	return status;
+}
+
+static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*maxname = 1024;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
+		READ_BUF(4);
+		READ32(*maxname);
+		bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
+	}
+	dprintk("%s: maxname=%u\n", __FUNCTION__, *maxname);
+	return status;
+}
+
+static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 1024;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXREAD - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
+		uint64_t maxread;
+		READ_BUF(8);
+		READ64(maxread);
+		if (maxread > 0x7FFFFFFF)
+			maxread = 0x7FFFFFFF;
+		*res = (uint32_t)maxread;
+		bitmap[0] &= ~FATTR4_WORD0_MAXREAD;
+	}
+	dprintk("%s: maxread=%lu\n", __FUNCTION__, (unsigned long)*res);
+	return status;
+}
+
+static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 1024;
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXWRITE - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
+		uint64_t maxwrite;
+		READ_BUF(8);
+		READ64(maxwrite);
+		if (maxwrite > 0x7FFFFFFF)
+			maxwrite = 0x7FFFFFFF;
+		*res = (uint32_t)maxwrite;
+		bitmap[0] &= ~FATTR4_WORD0_MAXWRITE;
+	}
+	dprintk("%s: maxwrite=%lu\n", __FUNCTION__, (unsigned long)*res);
+	return status;
+}
+
+static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
+{
+	uint32_t *p;
+
+	*mode = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
+		READ_BUF(4);
+		READ32(*mode);
+		*mode &= ~S_IFMT;
+		bitmap[1] &= ~FATTR4_WORD1_MODE;
+	}
+	dprintk("%s: file mode=0%o\n", __FUNCTION__, (unsigned int)*mode);
+	return 0;
+}
+
+static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
+{
+	uint32_t *p;
+
+	*nlink = 1;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
+		READ_BUF(4);
+		READ32(*nlink);
+		bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
+	}
+	dprintk("%s: nlink=%u\n", __FUNCTION__, (unsigned int)*nlink);
+	return 0;
+}
+
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+{
+	uint32_t len, *p;
+
+	*uid = -2;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+		if (len < XDR_MAX_NETOBJ) {
+			if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
+				dprintk("%s: nfs_map_name_to_uid failed!\n",
+						__FUNCTION__);
+		} else
+			printk(KERN_WARNING "%s: name too long (%u)!\n",
+					__FUNCTION__, len);
+		bitmap[1] &= ~FATTR4_WORD1_OWNER;
+	}
+	dprintk("%s: uid=%d\n", __FUNCTION__, (int)*uid);
+	return 0;
+}
+
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+{
+	uint32_t len, *p;
+
+	*gid = -2;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+		if (len < XDR_MAX_NETOBJ) {
+			if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
+				dprintk("%s: nfs_map_group_to_gid failed!\n",
+						__FUNCTION__);
+		} else
+			printk(KERN_WARNING "%s: name too long (%u)!\n",
+					__FUNCTION__, len);
+		bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+	}
+	dprintk("%s: gid=%d\n", __FUNCTION__, (int)*gid);
+	return 0;
+}
+
+static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
+{
+	uint32_t major = 0, minor = 0, *p;
+
+	*rdev = MKDEV(0,0);
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
+		dev_t tmp;
+
+		READ_BUF(8);
+		READ32(major);
+		READ32(minor);
+		tmp = MKDEV(major, minor);
+		if (MAJOR(tmp) == major && MINOR(tmp) == minor)
+			*rdev = tmp;
+		bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
+	}
+	dprintk("%s: rdev=(0x%x:0x%x)\n", __FUNCTION__, major, minor);
+	return 0;
+}
+
+static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
+	}
+	dprintk("%s: space avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
+	}
+	dprintk("%s: space free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+	uint32_t *p;
+	int status = 0;
+
+	*res = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
+		READ_BUF(8);
+		READ64(*res);
+		bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
+	}
+	dprintk("%s: space total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+	return status;
+}
+
+static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
+{
+	uint32_t *p;
+
+	*used = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
+		READ_BUF(8);
+		READ64(*used);
+		bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
+	}
+	dprintk("%s: space used=%Lu\n", __FUNCTION__,
+			(unsigned long long)*used);
+	return 0;
+}
+
+static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
+{
+	uint32_t *p;
+	uint64_t sec;
+	uint32_t nsec;
+
+	READ_BUF(12);
+	READ64(sec);
+	READ32(nsec);
+	time->tv_sec = (time_t)sec;
+	time->tv_nsec = (long)nsec;
+	return 0;
+}
+
+static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+	int status = 0;
+
+	time->tv_sec = 0;
+	time->tv_nsec = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_ACCESS - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
+		status = decode_attr_time(xdr, time);
+		bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+	}
+	dprintk("%s: atime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+	return status;
+}
+
+static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+	int status = 0;
+
+	time->tv_sec = 0;
+	time->tv_nsec = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_METADATA - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
+		status = decode_attr_time(xdr, time);
+		bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
+	}
+	dprintk("%s: ctime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+	return status;
+}
+
+static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+	int status = 0;
+
+	time->tv_sec = 0;
+	time->tv_nsec = 0;
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_MODIFY - 1U)))
+		return -EIO;
+	if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
+		status = decode_attr_time(xdr, time);
+		bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+	}
+	dprintk("%s: mtime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+	return status;
+}
+
+static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen)
+{
+	unsigned int attrwords = XDR_QUADLEN(attrlen);
+	unsigned int nwords = xdr->p - savep;
+
+	if (unlikely(attrwords != nwords)) {
+		printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
+				__FUNCTION__,
+				attrwords << 2,
+				(attrwords < nwords) ? '<' : '>',
+				nwords << 2);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+	uint32_t *p;
+
+	READ_BUF(20);
+	READ32(cinfo->atomic);
+	READ64(cinfo->before);
+	READ64(cinfo->after);
+	return 0;
+}
+
+static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
+{
+	uint32_t *p;
+	uint32_t supp, acc;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_ACCESS);
+	if (status)
+		return status;
+	READ_BUF(8);
+	READ32(supp);
+	READ32(acc);
+	access->supported = supp;
+	access->access = acc;
+	return 0;
+}
+
+static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_CLOSE);
+	if (status)
+		return status;
+	READ_BUF(sizeof(res->stateid.data));
+	COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+	return 0;
+}
+
+static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_COMMIT);
+	if (status)
+		return status;
+	READ_BUF(8);
+	COPYMEM(res->verf->verifier, 8);
+	return 0;
+}
+
+static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+	uint32_t *p;
+	uint32_t bmlen;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_CREATE);
+	if (status)
+		return status;
+	if ((status = decode_change_info(xdr, cinfo)))
+		return status;
+	READ_BUF(4);
+	READ32(bmlen);
+	READ_BUF(bmlen << 2);
+	return 0;
+}
+
+static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+{
+	uint32_t *savep;
+	uint32_t attrlen, 
+		 bitmap[2] = {0};
+	int status;
+
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
+		goto xdr_error;
+	status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+	if (status != 0)
+		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	return status;
+}
+	
+static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+{
+	uint32_t *savep;
+	uint32_t attrlen, 
+		 bitmap[2] = {0};
+	int status;
+	
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto xdr_error;
+
+	if ((status = decode_attr_files_avail(xdr, bitmap, &fsstat->afiles)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_files_free(xdr, bitmap, &fsstat->ffiles)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_space_total(xdr, bitmap, &fsstat->tbytes)) != 0)
+		goto xdr_error;
+
+	status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+	if (status != 0)
+		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	return status;
+}
+
+static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+{
+	uint32_t *savep;
+	uint32_t attrlen, 
+		 bitmap[2] = {0};
+	int status;
+	
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto xdr_error;
+
+	if ((status = decode_attr_maxlink(xdr, bitmap, &pathconf->max_link)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_maxname(xdr, bitmap, &pathconf->max_namelen)) != 0)
+		goto xdr_error;
+
+	status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+	if (status != 0)
+		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	return status;
+}
+
+static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server)
+{
+	uint32_t *savep;
+	uint32_t attrlen,
+		 bitmap[2] = {0},
+		 type;
+	int status, fmode = 0;
+
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto xdr_error;
+
+	fattr->bitmap[0] = bitmap[0];
+	fattr->bitmap[1] = bitmap[1];
+
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto xdr_error;
+
+
+	if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
+		goto xdr_error;
+	fattr->type = nfs_type2fmt[type].nfs2type;
+	fmode = nfs_type2fmt[type].mode;
+
+	if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
+		goto xdr_error;
+	fattr->mode |= fmode;
+	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
+		goto xdr_error;
+	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) {
+		fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+		fattr->timestamp = jiffies;
+	}
+xdr_error:
+	if (status != 0)
+		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	return status;
+}
+
+
+static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
+{
+	uint32_t *savep;
+	uint32_t attrlen, bitmap[2];
+	int status;
+
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto xdr_error;
+
+	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
+
+	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+		goto xdr_error;
+	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+		goto xdr_error;
+	fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
+	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+		goto xdr_error;
+	fsinfo->wtpref = fsinfo->wtmax;
+
+	status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+	if (status != 0)
+		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	return status;
+}
+
+static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	uint32_t *p;
+	uint32_t len;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_GETFH);
+	if (status)
+		return status;
+	/* Zero handle first to allow comparisons */
+	memset(fh, 0, sizeof(*fh));
+
+	READ_BUF(4);
+	READ32(len);
+	if (len > NFS4_FHSIZE)
+		return -EIO;
+	fh->size = len;
+	READ_BUF(len);
+	COPYMEM(fh->data, len);
+	return 0;
+}
+
+static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+	int status;
+	
+	status = decode_op_hdr(xdr, OP_LINK);
+	if (status)
+		return status;
+	return decode_change_info(xdr, cinfo);
+}
+
+/*
+ * We create the owner, so we know a proper owner.id length is 4.
+ */
+static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied)
+{
+	uint32_t *p;
+	uint32_t namelen;
+
+	READ_BUF(32);
+	READ64(denied->offset);
+	READ64(denied->length);
+	READ32(denied->type);
+	READ64(denied->owner.clientid);
+	READ32(namelen);
+	READ_BUF(namelen);
+	if (namelen == 4)
+		READ32(denied->owner.id);
+	return -NFS4ERR_DENIED;
+}
+
+static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LOCK);
+	if (status == 0) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+	} else if (status == -NFS4ERR_DENIED)
+		return decode_lock_denied(xdr, &res->u.denied);
+	return status;
+}
+
+static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+	int status;
+	status = decode_op_hdr(xdr, OP_LOCKT);
+	if (status == -NFS4ERR_DENIED)
+		return decode_lock_denied(xdr, &res->u.denied);
+	return status;
+}
+
+static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LOCKU);
+	if (status == 0) {
+		READ_BUF(sizeof(nfs4_stateid));
+		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+	}
+	return status;
+}
+
+static int decode_lookup(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_LOOKUP);
+}
+
+/* This is too sick! */
+static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
+{
+        uint32_t *p;
+	uint32_t limit_type, nblocks, blocksize;
+
+	READ_BUF(12);
+	READ32(limit_type);
+	switch (limit_type) {
+		case 1:
+			READ64(*maxsize);
+			break;
+		case 2:
+			READ32(nblocks);
+			READ32(blocksize);
+			*maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
+	}
+	return 0;
+}
+
+static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+{
+        uint32_t *p;
+        uint32_t delegation_type;
+
+	READ_BUF(4);
+	READ32(delegation_type);
+	if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
+		res->delegation_type = 0;
+		return 0;
+	}
+	READ_BUF(20);
+	COPYMEM(res->delegation.data, sizeof(res->delegation.data));
+	READ32(res->do_recall);
+	switch (delegation_type) {
+		case NFS4_OPEN_DELEGATE_READ:
+			res->delegation_type = FMODE_READ;
+			break;
+		case NFS4_OPEN_DELEGATE_WRITE:
+			res->delegation_type = FMODE_WRITE|FMODE_READ;
+			if (decode_space_limit(xdr, &res->maxsize) < 0)
+				return -EIO;
+	}
+	return decode_ace(xdr, NULL, res->server->nfs4_state);
+}
+
+static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+{
+        uint32_t *p;
+        uint32_t bmlen;
+        int status;
+
+        status = decode_op_hdr(xdr, OP_OPEN);
+        if (status)
+                return status;
+        READ_BUF(sizeof(res->stateid.data));
+        COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+
+        decode_change_info(xdr, &res->cinfo);
+
+        READ_BUF(8);
+        READ32(res->rflags);
+        READ32(bmlen);
+        if (bmlen > 10)
+                goto xdr_error;
+
+        READ_BUF(bmlen << 2);
+        p += bmlen;
+	return decode_delegation(xdr, res);
+xdr_error:
+	printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__);
+	return -EIO;
+}
+
+static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
+{
+        uint32_t *p;
+	int status;
+
+        status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
+        if (status)
+                return status;
+        READ_BUF(sizeof(res->stateid.data));
+        COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+        return 0;
+}
+
+static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
+	if (status)
+		return status;
+	READ_BUF(sizeof(res->stateid.data));
+	COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+	return 0;
+}
+
+static int decode_putfh(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_PUTFH);
+}
+
+static int decode_putrootfh(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_PUTROOTFH);
+}
+
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+{
+	struct kvec *iov = req->rq_rcv_buf.head;
+	uint32_t *p;
+	uint32_t count, eof, recvd, hdrlen;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_READ);
+	if (status)
+		return status;
+	READ_BUF(8);
+	READ32(eof);
+	READ32(count);
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (count > recvd) {
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
+				"count %u > recvd %u\n", count, recvd);
+		count = recvd;
+		eof = 0;
+	}
+	xdr_read_pages(xdr, count);
+	res->eof = eof;
+	res->count = count;
+	return 0;
+}
+
+static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
+{
+	struct xdr_buf	*rcvbuf = &req->rq_rcv_buf;
+	struct page	*page = *rcvbuf->pages;
+	struct kvec	*iov = rcvbuf->head;
+	unsigned int	nr, pglen = rcvbuf->page_len;
+	uint32_t	*end, *entry, *p, *kaddr;
+	uint32_t	len, attrlen;
+	int 		hdrlen, recvd, status;
+
+	status = decode_op_hdr(xdr, OP_READDIR);
+	if (status)
+		return status;
+	READ_BUF(8);
+	COPYMEM(readdir->verifier.data, 8);
+
+	hdrlen = (char *) p - (char *) iov->iov_base;
+	recvd = rcvbuf->len - hdrlen;
+	if (pglen > recvd)
+		pglen = recvd;
+	xdr_read_pages(xdr, pglen);
+
+	BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
+	kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
+	end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+	entry = p;
+	for (nr = 0; *p++; nr++) {
+		if (p + 3 > end)
+			goto short_pkt;
+		p += 2;			/* cookie */
+		len = ntohl(*p++);	/* filename length */
+		if (len > NFS4_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+			goto err_unmap;
+		}
+		p += XDR_QUADLEN(len);
+		if (p + 1 > end)
+			goto short_pkt;
+		len = ntohl(*p++);	/* bitmap length */
+		p += len;
+		if (p + 1 > end)
+			goto short_pkt;
+		attrlen = XDR_QUADLEN(ntohl(*p++));
+		p += attrlen;		/* attributes */
+		if (p + 2 > end)
+			goto short_pkt;
+		entry = p;
+	}
+	if (!nr && (entry[0] != 0 || entry[1] == 0))
+		goto short_pkt;
+out:	
+	kunmap_atomic(kaddr, KM_USER0);
+	return 0;
+short_pkt:
+	entry[0] = entry[1] = 0;
+	/* truncate listing ? */
+	if (!nr) {
+		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
+	goto out;
+err_unmap:
+	kunmap_atomic(kaddr, KM_USER0);
+	return -errno_NFSERR_IO;
+}
+
+static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	int hdrlen, len, recvd;
+	uint32_t *p;
+	char *kaddr;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_READLINK);
+	if (status)
+		return status;
+
+	/* Convert length of symlink */
+	READ_BUF(4);
+	READ32(len);
+	if (len >= rcvbuf->page_len || len <= 0) {
+		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		return -ENAMETOOLONG;
+	}
+	hdrlen = (char *) xdr->p - (char *) iov->iov_base;
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (recvd < len) {
+		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+				"count %u > recvd %u\n", len, recvd);
+		return -EIO;
+	}
+	xdr_read_pages(xdr, len);
+	/*
+	 * The XDR encode routine has set things up so that
+	 * the link text will be copied directly into the
+	 * buffer.  We just have to do overflow-checking,
+	 * and and null-terminate the text (the VFS expects
+	 * null-termination).
+	 */
+	kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+	kaddr[len+rcvbuf->page_base] = '\0';
+	kunmap_atomic(kaddr, KM_USER0);
+	return 0;
+}
+
+static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+	int status;
+
+	status = decode_op_hdr(xdr, OP_REMOVE);
+	if (status)
+		goto out;
+	status = decode_change_info(xdr, cinfo);
+out:
+	return status;
+}
+
+static int decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo,
+	      struct nfs4_change_info *new_cinfo)
+{
+	int status;
+
+	status = decode_op_hdr(xdr, OP_RENAME);
+	if (status)
+		goto out;
+	if ((status = decode_change_info(xdr, old_cinfo)))
+		goto out;
+	status = decode_change_info(xdr, new_cinfo);
+out:
+	return status;
+}
+
+static int decode_renew(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_RENEW);
+}
+
+static int
+decode_savefh(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_SAVEFH);
+}
+
+static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
+{
+	uint32_t *p;
+	uint32_t bmlen;
+	int status;
+
+        
+	status = decode_op_hdr(xdr, OP_SETATTR);
+	if (status)
+		return status;
+	READ_BUF(4);
+	READ32(bmlen);
+	READ_BUF(bmlen << 2);
+	return 0;
+}
+
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+{
+	uint32_t *p;
+	uint32_t opnum;
+	int32_t nfserr;
+
+	READ_BUF(8);
+	READ32(opnum);
+	if (opnum != OP_SETCLIENTID) {
+		printk(KERN_NOTICE
+				"nfs4_decode_setclientid: Server returned operation"
+			       	" %d\n", opnum);
+		return -EIO;
+	}
+	READ32(nfserr);
+	if (nfserr == NFS_OK) {
+		READ_BUF(8 + sizeof(clp->cl_confirm.data));
+		READ64(clp->cl_clientid);
+		COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data));
+	} else if (nfserr == NFSERR_CLID_INUSE) {
+		uint32_t len;
+
+		/* skip netid string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+
+		/* skip uaddr string */
+		READ_BUF(4);
+		READ32(len);
+		READ_BUF(len);
+		return -NFSERR_CLID_INUSE;
+	} else
+		return -nfs_stat_to_errno(nfserr);
+
+	return 0;
+}
+
+static int decode_setclientid_confirm(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
+}
+
+static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+{
+	uint32_t *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_WRITE);
+	if (status)
+		return status;
+
+	READ_BUF(16);
+	READ32(res->count);
+	READ32(res->verf->committed);
+	COPYMEM(res->verf->verifier, 8);
+	return 0;
+}
+
+static int decode_delegreturn(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_DELEGRETURN);
+}
+
+/*
+ * Decode OPEN_DOWNGRADE response
+ */
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open_downgrade(&xdr, res);
+out:
+        return status;
+}
+
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * Decode ACCESS response
+ */
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) == 0)
+		status = decode_access(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode LOOKUP response
+ */
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_lookup(&xdr)) != 0)
+		goto out;
+	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+		goto out;
+	status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+	return status;
+}
+
+/*
+ * Decode LOOKUP_ROOT response
+ */
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putrootfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_getfh(&xdr, res->fh)) == 0)
+		status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+	return status;
+}
+
+/*
+ * Decode REMOVE response
+ */
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) == 0)
+		status = decode_remove(&xdr, cinfo);
+out:
+	return status;
+}
+
+/*
+ * Decode RENAME response
+ */
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_savefh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo);
+out:
+	return status;
+}
+
+/*
+ * Decode LINK response
+ */
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_savefh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	status = decode_link(&xdr, cinfo);
+out:
+	return status;
+}
+
+/*
+ * Decode CREATE response
+ */
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+		goto out;
+	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+		goto out;
+	status = decode_getfattr(&xdr, res->fattr, res->server);
+	if (status == NFS4ERR_DELAY)
+		status = 0;
+out:
+	return status;
+}
+
+/*
+ * Decode SYMLINK response
+ */
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+{
+	return nfs4_xdr_dec_create(rqstp, p, res);
+}
+
+/*
+ * Decode GETATTR response
+ */
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+	
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+	return status;
+
+}
+
+
+/*
+ * Decode CLOSE response
+ */
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_close(&xdr, res);
+out:
+        return status;
+}
+
+/*
+ * Decode OPEN response
+ */
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open(&xdr, res);
+        if (status)
+                goto out;
+	status = decode_getfh(&xdr, &res->fh);
+        if (status)
+		goto out;
+	status = decode_getfattr(&xdr, res->f_attr, res->server);
+	if (status == NFS4ERR_DELAY)
+		status = 0;
+out:
+        return status;
+}
+
+/*
+ * Decode OPEN_CONFIRM response
+ */
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open_confirm(&xdr, res);
+out:
+        return status;
+}
+
+/*
+ * Decode OPEN response
+ */
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open(&xdr, res);
+out:
+        return status;
+}
+
+/*
+ * Decode SETATTR response
+ */
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_setattrres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_setattr(&xdr, res);
+        if (status)
+                goto out;
+	status = decode_getfattr(&xdr, res->fattr, res->server);
+	if (status == NFS4ERR_DELAY)
+		status = 0;
+out:
+        return status;
+}
+
+/*
+ * Decode LOCK response
+ */
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_lock(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode LOCKT response
+ */
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_lockt(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode LOCKU response
+ */
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_locku(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode READLINK response
+ */
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_readlink(&xdr, rqstp);
+out:
+	return status;
+}
+
+/*
+ * Decode READDIR response
+ */
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_readdir(&xdr, rqstp, res);
+out:
+	return status;
+}
+
+/*
+ * Decode Read response
+ */
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_readres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_read(&xdr, rqstp, res);
+	if (!status)
+		status = res->count;
+out:
+	return status;
+}
+
+/*
+ * Decode WRITE response
+ */
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_write(&xdr, res);
+	if (!status)
+		status = res->count;
+out:
+	return status;
+}
+
+/*
+ * Decode COMMIT response
+ */
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_commit(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * FSINFO request
+ */
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_putfh(&xdr);
+	if (!status)
+		status = decode_fsinfo(&xdr, fsinfo);
+	if (!status)
+		status = -nfs_stat_to_errno(hdr.status);
+	return status;
+}
+
+/*
+ * PATHCONF request
+ */
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_putfh(&xdr);
+	if (!status)
+		status = decode_pathconf(&xdr, pathconf);
+	return status;
+}
+
+/*
+ * STATFS request
+ */
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_putfh(&xdr);
+	if (!status)
+		status = decode_statfs(&xdr, fsstat);
+	return status;
+}
+
+/*
+ * GETATTR_BITMAP request
+ */
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, uint32_t *p, struct nfs4_server_caps_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+		goto out;
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	status = decode_server_caps(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode RENEW response
+ */
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_renew(&xdr);
+	return status;
+}
+
+/*
+ * a SETCLIENTID request
+ */
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
+		struct nfs4_client *clp)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_setclientid(&xdr, clp);
+	if (!status)
+		status = -nfs_stat_to_errno(hdr.status);
+	return status;
+}
+
+/*
+ * a SETCLIENTID_CONFIRM request
+ */
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_setclientid_confirm(&xdr);
+	if (!status)
+		status = decode_putrootfh(&xdr);
+	if (!status)
+		status = decode_fsinfo(&xdr, fsinfo);
+	if (!status)
+		status = -nfs_stat_to_errno(hdr.status);
+	return status;
+}
+
+/*
+ * DELEGRETURN request
+ */
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status == 0) {
+		status = decode_putfh(&xdr);
+		if (status == 0)
+			status = decode_delegreturn(&xdr);
+	}
+	return status;
+}
+
+uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
+{
+	uint32_t bitmap[2] = {0};
+	uint32_t len;
+
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->prev_cookie = entry->cookie;
+	p = xdr_decode_hyper(p, &entry->cookie);
+	entry->len = ntohl(*p++);
+	entry->name = (const char *) p;
+	p += XDR_QUADLEN(entry->len);
+
+	/*
+	 * In case the server doesn't return an inode number,
+	 * we fake one here.  (We don't use inode number 0,
+	 * since glibc seems to choke on it...)
+	 */
+	entry->ino = 1;
+
+	len = ntohl(*p++);		/* bitmap length */
+	if (len-- > 0) {
+		bitmap[0] = ntohl(*p++);
+		if (len-- > 0) {
+			bitmap[1] = ntohl(*p++);
+			p += len;
+		}
+	}
+	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
+	if (len > 0) {
+		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
+			xdr_decode_hyper(p, &entry->ino);
+		else if (bitmap[0] == FATTR4_WORD0_FILEID)
+			xdr_decode_hyper(p, &entry->ino);
+		p += len;
+	}
+
+	entry->eof = !p[0] && p[1];
+	return p;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static struct {
+	int stat;
+	int errno;
+} nfs_errtbl[] = {
+	{ NFS4_OK,		0		},
+	{ NFS4ERR_PERM,		EPERM		},
+	{ NFS4ERR_NOENT,	ENOENT		},
+	{ NFS4ERR_IO,		errno_NFSERR_IO	},
+	{ NFS4ERR_NXIO,		ENXIO		},
+	{ NFS4ERR_ACCESS,	EACCES		},
+	{ NFS4ERR_EXIST,	EEXIST		},
+	{ NFS4ERR_XDEV,		EXDEV		},
+	{ NFS4ERR_NOTDIR,	ENOTDIR		},
+	{ NFS4ERR_ISDIR,	EISDIR		},
+	{ NFS4ERR_INVAL,	EINVAL		},
+	{ NFS4ERR_FBIG,		EFBIG		},
+	{ NFS4ERR_NOSPC,	ENOSPC		},
+	{ NFS4ERR_ROFS,		EROFS		},
+	{ NFS4ERR_MLINK,	EMLINK		},
+	{ NFS4ERR_NAMETOOLONG,	ENAMETOOLONG	},
+	{ NFS4ERR_NOTEMPTY,	ENOTEMPTY	},
+	{ NFS4ERR_DQUOT,	EDQUOT		},
+	{ NFS4ERR_STALE,	ESTALE		},
+	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
+	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
+	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
+	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
+	{ NFS4ERR_SERVERFAULT,	ESERVERFAULT	},
+	{ NFS4ERR_BADTYPE,	EBADTYPE	},
+	{ NFS4ERR_LOCKED,	EAGAIN		},
+	{ NFS4ERR_RESOURCE,	EREMOTEIO	},
+	{ NFS4ERR_SYMLINK,	ELOOP		},
+	{ NFS4ERR_OP_ILLEGAL,	EOPNOTSUPP	},
+	{ NFS4ERR_DEADLOCK,	EDEADLK		},
+	{ NFS4ERR_WRONGSEC,	EPERM		}, /* FIXME: this needs
+						    * to be handled by a
+						    * middle-layer.
+						    */
+	{ -1,			EIO		}
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+static int
+nfs_stat_to_errno(int stat)
+{
+	int i;
+	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+		if (nfs_errtbl[i].stat == stat)
+			return nfs_errtbl[i].errno;
+	}
+	if (stat <= 10000 || stat > 10100) {
+		/* The server is looney tunes. */
+		return ESERVERFAULT;
+	}
+	/* If we cannot translate the error, the recovery routines should
+	 * handle it.
+	 * Note: remaining NFSv4 error codes have values > 10000, so should
+	 * not conflict with native Linux error codes.
+	 */
+	return stat;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype)				\
+[NFSPROC4_CLNT_##proc] = {					\
+	.p_proc   = NFSPROC4_COMPOUND,				\
+	.p_encode = (kxdrproc_t) nfs4_xdr_##argtype,		\
+	.p_decode = (kxdrproc_t) nfs4_xdr_##restype,		\
+	.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+    }
+
+struct rpc_procinfo	nfs4_procedures[] = {
+  PROC(READ,		enc_read,	dec_read),
+  PROC(WRITE,		enc_write,	dec_write),
+  PROC(COMMIT,		enc_commit,	dec_commit),
+  PROC(OPEN,		enc_open,	dec_open),
+  PROC(OPEN_CONFIRM,	enc_open_confirm,	dec_open_confirm),
+  PROC(OPEN_NOATTR,	enc_open_noattr,	dec_open_noattr),
+  PROC(OPEN_DOWNGRADE,	enc_open_downgrade,	dec_open_downgrade),
+  PROC(CLOSE,		enc_close,	dec_close),
+  PROC(SETATTR,		enc_setattr,	dec_setattr),
+  PROC(FSINFO,		enc_fsinfo,	dec_fsinfo),
+  PROC(RENEW,		enc_renew,	dec_renew),
+  PROC(SETCLIENTID,	enc_setclientid,	dec_setclientid),
+  PROC(SETCLIENTID_CONFIRM,	enc_setclientid_confirm,	dec_setclientid_confirm),
+  PROC(LOCK,            enc_lock,       dec_lock),
+  PROC(LOCKT,           enc_lockt,      dec_lockt),
+  PROC(LOCKU,           enc_locku,      dec_locku),
+  PROC(ACCESS,		enc_access,	dec_access),
+  PROC(GETATTR,		enc_getattr,	dec_getattr),
+  PROC(LOOKUP,		enc_lookup,	dec_lookup),
+  PROC(LOOKUP_ROOT,	enc_lookup_root,	dec_lookup_root),
+  PROC(REMOVE,		enc_remove,	dec_remove),
+  PROC(RENAME,		enc_rename,	dec_rename),
+  PROC(LINK,		enc_link,	dec_link),
+  PROC(SYMLINK,		enc_symlink,	dec_symlink),
+  PROC(CREATE,		enc_create,	dec_create),
+  PROC(PATHCONF,	enc_pathconf,	dec_pathconf),
+  PROC(STATFS,		enc_statfs,	dec_statfs),
+  PROC(READLINK,	enc_readlink,	dec_readlink),
+  PROC(READDIR,		enc_readdir,	dec_readdir),
+  PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
+  PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
+};
+
+struct rpc_version		nfs_version4 = {
+	.number			= 4,
+	.nrprocs		= sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+	.procs			= nfs4_procedures
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
new file mode 100644
index 00000000000..fd5bc596fe8
--- /dev/null
+++ b/fs/nfs/nfsroot.c
@@ -0,0 +1,513 @@
+/*
+ *  $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
+ *
+ *  Copyright (C) 1995, 1996  Gero Kuhlmann <gero@gkminix.han.de>
+ *
+ *  Allow an NFS filesystem to be mounted as root. The way this works is:
+ *     (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
+ *     (2) Handle RPC negotiation with the system which replied to RARP or
+ *         was reported as a boot server by BOOTP or manually.
+ *     (3) The actual mounting is done later, when init() is running.
+ *
+ *
+ *	Changes:
+ *
+ *	Alan Cox	:	Removed get_address name clash with FPU.
+ *	Alan Cox	:	Reformatted a bit.
+ *	Gero Kuhlmann	:	Code cleanup
+ *	Michael Rausch  :	Fixed recognition of an incoming RARP answer.
+ *	Martin Mares	: (2.0)	Auto-configuration via BOOTP supported.
+ *	Martin Mares	:	Manual selection of interface & BOOTP/RARP.
+ *	Martin Mares	:	Using network routes instead of host routes,
+ *				allowing the default configuration to be used
+ *				for normal operation of the host.
+ *	Martin Mares	:	Randomized timer with exponential backoff
+ *				installed to minimize network congestion.
+ *	Martin Mares	:	Code cleanup.
+ *	Martin Mares	: (2.1)	BOOTP and RARP made configuration options.
+ *	Martin Mares	:	Server hostname generation fixed.
+ *	Gerd Knorr	:	Fixed wired inode handling
+ *	Martin Mares	: (2.2)	"0.0.0.0" addresses from command line ignored.
+ *	Martin Mares	:	RARP replies not tested for server address.
+ *	Gero Kuhlmann	: (2.3) Some bug fixes and code cleanup again (please
+ *				send me your new patches _before_ bothering
+ *				Linus so that I don' always have to cleanup
+ *				_afterwards_ - thanks)
+ *	Gero Kuhlmann	:	Last changes of Martin Mares undone.
+ *	Gero Kuhlmann	: 	RARP replies are tested for specified server
+ *				again. However, it's now possible to have
+ *				different RARP and NFS servers.
+ *	Gero Kuhlmann	:	"0.0.0.0" addresses from command line are
+ *				now mapped to INADDR_NONE.
+ *	Gero Kuhlmann	:	Fixed a bug which prevented BOOTP path name
+ *				from being used (thanks to Leo Spiekman)
+ *	Andy Walker	:	Allow to specify the NFS server in nfs_root
+ *				without giving a path name
+ *	Swen Th�mmler	:	Allow to specify the NFS options in nfs_root
+ *				without giving a path name. Fix BOOTP request
+ *				for domainname (domainname is NIS domain, not
+ *				DNS domain!). Skip dummy devices for BOOTP.
+ *	Jacek Zapala	:	Fixed a bug which prevented server-ip address
+ *				from nfsroot parameter from being used.
+ *	Olaf Kirch	:	Adapted to new NFS code.
+ *	Jakub Jelinek	:	Free used code segment.
+ *	Marko Kohtala	:	Fixed some bugs.
+ *	Martin Mares	:	Debug message cleanup
+ *	Martin Mares	:	Changed to use the new generic IP layer autoconfig
+ *				code. BOOTP and RARP moved there.
+ *	Martin Mares	:	Default path now contains host name instead of
+ *				host IP address (but host name defaults to IP
+ *				address anyway).
+ *	Martin Mares	:	Use root_server_addr appropriately during setup.
+ *	Martin Mares	:	Rewrote parameter parsing, now hopefully giving
+ *				correct overriding.
+ *	Trond Myklebust :	Add in preliminary support for NFSv3 and TCP.
+ *				Fix bug in root_nfs_addr(). nfs_data.namlen
+ *				is NOT for the length of the hostname.
+ *	Hua Qin		:	Support for mounting root file system via
+ *				NFS over TCP.
+ *	Fabian Frederick:	Option parser rebuilt (using parser lib)
+*/
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/in.h>
+#include <linux/major.h>
+#include <linux/utsname.h>
+#include <linux/inet.h>
+#include <linux/root_dev.h>
+#include <net/ipconfig.h>
+#include <linux/parser.h>
+
+/* Define this to allow debugging output */
+#undef NFSROOT_DEBUG
+#define NFSDBG_FACILITY NFSDBG_ROOT
+
+/* Default path we try to mount. "%s" gets replaced by our IP address */
+#define NFS_ROOT		"/tftpboot/%s"
+
+/* Parameters passed from the kernel command line */
+static char nfs_root_name[256] __initdata = "";
+
+/* Address of NFS server */
+static __u32 servaddr __initdata = 0;
+
+/* Name of directory to mount */
+static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
+
+/* NFS-related data */
+static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
+static int nfs_port __initdata = 0;		/* Port to connect to for NFS */
+static int mount_port __initdata = 0;		/* Mount daemon port number */
+
+
+/***************************************************************************
+
+			     Parsing of options
+
+ ***************************************************************************/
+
+enum {
+	/* Options that take integer arguments */
+	Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
+	Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
+	/* Options that take no arguments */
+	Opt_soft, Opt_hard, Opt_intr,
+	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
+	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+	/* Error token */
+	Opt_err
+};
+
+static match_table_t __initdata tokens = {
+	{Opt_port, "port=%u"},
+	{Opt_rsize, "rsize=%u"},
+	{Opt_wsize, "wsize=%u"},
+	{Opt_timeo, "timeo=%u"},
+	{Opt_retrans, "retrans=%u"},
+	{Opt_acregmin, "acregmin=%u"},
+	{Opt_acregmax, "acregmax=%u"},
+	{Opt_acdirmin, "acdirmin=%u"},
+	{Opt_acdirmax, "acdirmax=%u"},
+	{Opt_soft, "soft"},
+	{Opt_hard, "hard"},
+	{Opt_intr, "intr"},
+	{Opt_nointr, "nointr"},
+	{Opt_posix, "posix"},
+	{Opt_noposix, "noposix"},
+	{Opt_cto, "cto"},
+	{Opt_nocto, "nocto"},
+	{Opt_ac, "ac"},
+	{Opt_noac, "noac"},
+	{Opt_lock, "lock"},
+	{Opt_nolock, "nolock"},
+	{Opt_v2, "nfsvers=2"},
+	{Opt_v2, "v2"},
+	{Opt_v3, "nfsvers=3"},
+	{Opt_v3, "v3"},
+	{Opt_udp, "proto=udp"},
+	{Opt_udp, "udp"},
+	{Opt_tcp, "proto=tcp"},
+	{Opt_tcp, "tcp"},
+	{Opt_err, NULL}
+	
+};
+
+/*
+ *  Parse option string.
+ */
+
+static int __init root_nfs_parse(char *name, char *buf)
+{
+
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+
+	if (!name)
+		return 1;
+
+	/* Set the NFS remote path */
+	p = strsep(&name, ",");
+	if (p[0] != '\0' && strcmp(p, "default") != 0)
+		strlcpy(buf, p, NFS_MAXPATHLEN);
+
+	while ((p = strsep (&name, ",")) != NULL) {
+		int token; 
+		if (!*p)
+			continue;
+		token = match_token(p, tokens, args);
+
+		/* %u tokens only. Beware if you add new tokens! */
+		if (token < Opt_soft && match_int(&args[0], &option))
+			return 0;
+		switch (token) {
+			case Opt_port:
+				nfs_port = option;
+				break;
+			case Opt_rsize:
+				nfs_data.rsize = option;
+				break;
+			case Opt_wsize:
+				nfs_data.wsize = option;
+				break;
+			case Opt_timeo:
+				nfs_data.timeo = option;
+				break;
+			case Opt_retrans:
+				nfs_data.retrans = option;
+				break;
+			case Opt_acregmin:
+				nfs_data.acregmin = option;
+				break;
+			case Opt_acregmax:
+				nfs_data.acregmax = option;
+				break;
+			case Opt_acdirmin:
+				nfs_data.acdirmin = option;
+				break;
+			case Opt_acdirmax:
+				nfs_data.acdirmax = option;
+				break;
+			case Opt_soft:
+				nfs_data.flags |= NFS_MOUNT_SOFT;
+				break;
+			case Opt_hard:
+				nfs_data.flags &= ~NFS_MOUNT_SOFT;
+				break;
+			case Opt_intr:
+				nfs_data.flags |= NFS_MOUNT_INTR;
+				break;
+			case Opt_nointr:
+				nfs_data.flags &= ~NFS_MOUNT_INTR;
+				break;
+			case Opt_posix:
+				nfs_data.flags |= NFS_MOUNT_POSIX;
+				break;
+			case Opt_noposix:
+				nfs_data.flags &= ~NFS_MOUNT_POSIX;
+				break;
+			case Opt_cto:
+				nfs_data.flags &= ~NFS_MOUNT_NOCTO;
+				break;
+			case Opt_nocto:
+				nfs_data.flags |= NFS_MOUNT_NOCTO;
+				break;
+			case Opt_ac:
+				nfs_data.flags &= ~NFS_MOUNT_NOAC;
+				break;
+			case Opt_noac:
+				nfs_data.flags |= NFS_MOUNT_NOAC;
+				break;
+			case Opt_lock:
+				nfs_data.flags &= ~NFS_MOUNT_NONLM;
+				break;
+			case Opt_nolock:
+				nfs_data.flags |= NFS_MOUNT_NONLM;
+				break;
+			case Opt_v2:
+				nfs_data.flags &= ~NFS_MOUNT_VER3;
+				break;
+			case Opt_v3:
+				nfs_data.flags |= NFS_MOUNT_VER3;
+				break;
+			case Opt_udp:
+				nfs_data.flags &= ~NFS_MOUNT_TCP;
+				break;
+			case Opt_tcp:
+				nfs_data.flags |= NFS_MOUNT_TCP;
+				break;
+			default : 
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
+/*
+ *  Prepare the NFS data structure and parse all options.
+ */
+static int __init root_nfs_name(char *name)
+{
+	static char buf[NFS_MAXPATHLEN] __initdata;
+	char *cp;
+
+	/* Set some default values */
+	memset(&nfs_data, 0, sizeof(nfs_data));
+	nfs_port          = -1;
+	nfs_data.version  = NFS_MOUNT_VERSION;
+	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
+	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.acregmin = 3;
+	nfs_data.acregmax = 60;
+	nfs_data.acdirmin = 30;
+	nfs_data.acdirmax = 60;
+	strcpy(buf, NFS_ROOT);
+
+	/* Process options received from the remote server */
+	root_nfs_parse(root_server_path, buf);
+
+	/* Override them by options set on kernel command-line */
+	root_nfs_parse(name, buf);
+
+	cp = system_utsname.nodename;
+	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+		return -1;
+	}
+	sprintf(nfs_path, buf, cp);
+
+	return 1;
+}
+
+
+/*
+ *  Get NFS server address.
+ */
+static int __init root_nfs_addr(void)
+{
+	if ((servaddr = root_server_addr) == INADDR_NONE) {
+		printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
+		return -1;
+	}
+
+	snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
+		 "%u.%u.%u.%u", NIPQUAD(servaddr));
+	return 0;
+}
+
+/*
+ *  Tell the user what's going on.
+ */
+#ifdef NFSROOT_DEBUG
+static void __init root_nfs_print(void)
+{
+	printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
+		nfs_path, nfs_data.hostname);
+	printk(KERN_NOTICE "Root-NFS:     rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
+		nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
+	printk(KERN_NOTICE "Root-NFS:     acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
+		nfs_data.acregmin, nfs_data.acregmax,
+		nfs_data.acdirmin, nfs_data.acdirmax);
+	printk(KERN_NOTICE "Root-NFS:     nfsd port = %d, mountd port = %d, flags = %08x\n",
+		nfs_port, mount_port, nfs_data.flags);
+}
+#endif
+
+
+static int __init root_nfs_init(void)
+{
+#ifdef NFSROOT_DEBUG
+	nfs_debug |= NFSDBG_ROOT;
+#endif
+
+	/*
+	 * Decode the root directory path name and NFS options from
+	 * the kernel command line. This has to go here in order to
+	 * be able to use the client IP address for the remote root
+	 * directory (necessary for pure RARP booting).
+	 */
+	if (root_nfs_name(nfs_root_name) < 0 ||
+	    root_nfs_addr() < 0)
+		return -1;
+
+#ifdef NFSROOT_DEBUG
+	root_nfs_print();
+#endif
+
+	return 0;
+}
+
+
+/*
+ *  Parse NFS server and directory information passed on the kernel
+ *  command line.
+ */
+static int __init nfs_root_setup(char *line)
+{
+	ROOT_DEV = Root_NFS;
+	if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
+		strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
+	} else {
+		int n = strlen(line) + sizeof(NFS_ROOT) - 1;
+		if (n >= sizeof(nfs_root_name))
+			line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
+		sprintf(nfs_root_name, NFS_ROOT, line);
+	}
+	root_server_addr = root_nfs_parse_addr(nfs_root_name);
+	return 1;
+}
+
+__setup("nfsroot=", nfs_root_setup);
+
+/***************************************************************************
+
+	       Routines to actually mount the root directory
+
+ ***************************************************************************/
+
+/*
+ *  Construct sockaddr_in from address and port number.
+ */
+static inline void
+set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
+{
+	sin->sin_family = AF_INET;
+	sin->sin_addr.s_addr = addr;
+	sin->sin_port = port;
+}
+
+/*
+ *  Query server portmapper for the port of a daemon program.
+ */
+static int __init root_nfs_getport(int program, int version, int proto)
+{
+	struct sockaddr_in sin;
+
+	printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
+		program, version, NIPQUAD(servaddr));
+	set_sockaddr(&sin, servaddr, 0);
+	return rpc_getport_external(&sin, program, version, proto);
+}
+
+
+/*
+ *  Use portmapper to find mountd and nfsd port numbers if not overriden
+ *  by the user. Use defaults if portmapper is not available.
+ *  XXX: Is there any nfs server with no portmapper?
+ */
+static int __init root_nfs_ports(void)
+{
+	int port;
+	int nfsd_ver, mountd_ver;
+	int nfsd_port, mountd_port;
+	int proto;
+
+	if (nfs_data.flags & NFS_MOUNT_VER3) {
+		nfsd_ver = NFS3_VERSION;
+		mountd_ver = NFS_MNT3_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	} else {
+		nfsd_ver = NFS2_VERSION;
+		mountd_ver = NFS_MNT_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	}
+
+	proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+
+	if (nfs_port < 0) {
+		if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
+			printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
+					"number from server, using default\n");
+			port = nfsd_port;
+		}
+		nfs_port = htons(port);
+		dprintk("Root-NFS: Portmapper on server returned %d "
+			"as nfsd port\n", port);
+	}
+
+	if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
+		printk(KERN_ERR "Root-NFS: Unable to get mountd port "
+				"number from server, using default\n");
+		port = mountd_port;
+	}
+	mount_port = htons(port);
+	dprintk("Root-NFS: mountd port is %d\n", port);
+
+	return 0;
+}
+
+
+/*
+ *  Get a file handle from the server for the directory which is to be
+ *  mounted.
+ */
+static int __init root_nfs_get_handle(void)
+{
+	struct nfs_fh fh;
+	struct sockaddr_in sin;
+	int status;
+	int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
+					IPPROTO_TCP : IPPROTO_UDP;
+	int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
+					NFS_MNT3_VERSION : NFS_MNT_VERSION;
+
+	set_sockaddr(&sin, servaddr, mount_port);
+	status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+	if (status < 0)
+		printk(KERN_ERR "Root-NFS: Server returned error %d "
+				"while mounting %s\n", status, nfs_path);
+	else {
+		nfs_data.root.size = fh.size;
+		memcpy(nfs_data.root.data, fh.data, fh.size);
+	}
+
+	return status;
+}
+
+/*
+ *  Get the NFS port numbers and file handle, and return the prepared 'data'
+ *  argument for mount() if everything went OK. Return NULL otherwise.
+ */
+void * __init nfs_root_data(void)
+{
+	if (root_nfs_init() < 0
+	 || root_nfs_ports() < 0
+	 || root_nfs_get_handle() < 0)
+		return NULL;
+	set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
+	return (void*)&nfs_data;
+}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
new file mode 100644
index 00000000000..4f1ba723848
--- /dev/null
+++ b/fs/nfs/pagelist.c
@@ -0,0 +1,309 @@
+/*
+ * linux/fs/nfs/pagelist.c
+ *
+ * A set of helper functions for managing NFS read and write requests.
+ * The main purpose of these routines is to provide support for the
+ * coalescing of several requests into a single RPC call.
+ *
+ * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_page.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+
+#define NFS_PARANOIA 1
+
+static kmem_cache_t *nfs_page_cachep;
+
+static inline struct nfs_page *
+nfs_page_alloc(void)
+{
+	struct nfs_page	*p;
+	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->wb_list);
+	}
+	return p;
+}
+
+static inline void
+nfs_page_free(struct nfs_page *p)
+{
+	kmem_cache_free(nfs_page_cachep, p);
+}
+
+/**
+ * nfs_create_request - Create an NFS read/write request.
+ * @file: file descriptor to use
+ * @inode: inode to which the request is attached
+ * @page: page to write
+ * @offset: starting offset within the page for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page, and avoids
+ * a possible deadlock when we reach the hard limit on the number
+ * of dirty pages.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *
+nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+		   struct page *page,
+		   unsigned int offset, unsigned int count)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_page		*req;
+
+	/* Deal with hard limits.  */
+	for (;;) {
+		/* try to allocate the request struct */
+		req = nfs_page_alloc();
+		if (req != NULL)
+			break;
+
+		/* Try to free up at least one request in order to stay
+		 * below the hard limit
+		 */
+		if (signalled() && (server->flags & NFS_MOUNT_INTR))
+			return ERR_PTR(-ERESTARTSYS);
+		yield();
+	}
+
+	/* Initialize the request struct. Initially, we assume a
+	 * long write-back delay. This will be adjusted in
+	 * update_nfs_request below if the region is not locked. */
+	req->wb_page    = page;
+	atomic_set(&req->wb_complete, 0);
+	req->wb_index	= page->index;
+	page_cache_get(page);
+	req->wb_offset  = offset;
+	req->wb_pgbase	= offset;
+	req->wb_bytes   = count;
+	atomic_set(&req->wb_count, 1);
+	req->wb_context = get_nfs_open_context(ctx);
+
+	return req;
+}
+
+/**
+ * nfs_unlock_request - Unlock request and wake up sleepers.
+ * @req:
+ */
+void nfs_unlock_request(struct nfs_page *req)
+{
+	if (!NFS_WBACK_BUSY(req)) {
+		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+		BUG();
+	}
+	smp_mb__before_clear_bit();
+	clear_bit(PG_BUSY, &req->wb_flags);
+	smp_mb__after_clear_bit();
+	wake_up_all(&req->wb_context->waitq);
+	nfs_release_request(req);
+}
+
+/**
+ * nfs_clear_request - Free up all resources allocated to the request
+ * @req:
+ *
+ * Release page resources associated with a write request after it
+ * has completed.
+ */
+void nfs_clear_request(struct nfs_page *req)
+{
+	if (req->wb_page) {
+		page_cache_release(req->wb_page);
+		req->wb_page = NULL;
+	}
+}
+
+
+/**
+ * nfs_release_request - Release the count on an NFS read/write request
+ * @req: request to release
+ *
+ * Note: Should never be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+	if (!atomic_dec_and_test(&req->wb_count))
+		return;
+
+#ifdef NFS_PARANOIA
+	BUG_ON (!list_empty(&req->wb_list));
+	BUG_ON (NFS_WBACK_BUSY(req));
+#endif
+
+	/* Release struct file or cached credential */
+	nfs_clear_request(req);
+	put_nfs_open_context(req->wb_context);
+	nfs_page_free(req);
+}
+
+/**
+ * nfs_list_add_request - Insert a request into a sorted list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ *
+ * Note that the wb_list is sorted by page index in order to facilitate
+ * coalescing of requests.
+ * We use an insertion sort that is optimized for the case of appended
+ * writes.
+ */
+void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+	struct list_head *pos;
+
+#ifdef NFS_PARANOIA
+	if (!list_empty(&req->wb_list)) {
+		printk(KERN_ERR "NFS: Add to list failed!\n");
+		BUG();
+	}
+#endif
+	list_for_each_prev(pos, head) {
+		struct nfs_page	*p = nfs_list_entry(pos);
+		if (p->wb_index < req->wb_index)
+			break;
+	}
+	list_add(&req->wb_list, pos);
+	req->wb_list_head = head;
+}
+
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ * The user is responsible for holding a count on the request.
+ */
+int
+nfs_wait_on_request(struct nfs_page *req)
+{
+	struct inode	*inode = req->wb_context->dentry->d_inode;
+        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
+
+	if (!NFS_WBACK_BUSY(req))
+		return 0;
+	return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+}
+
+/**
+ * nfs_coalesce_requests - Split coalesced requests out from a list.
+ * @head: source list
+ * @dst: destination list
+ * @nmax: maximum number of requests to coalesce
+ *
+ * Moves a maximum of 'nmax' elements from one list to another.
+ * The elements are checked to ensure that they form a contiguous set
+ * of pages, and that the RPC credentials are the same.
+ */
+int
+nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
+		      unsigned int nmax)
+{
+	struct nfs_page		*req = NULL;
+	unsigned int		npages = 0;
+
+	while (!list_empty(head)) {
+		struct nfs_page	*prev = req;
+
+		req = nfs_list_entry(head->next);
+		if (prev) {
+			if (req->wb_context->cred != prev->wb_context->cred)
+				break;
+			if (req->wb_context->lockowner != prev->wb_context->lockowner)
+				break;
+			if (req->wb_context->state != prev->wb_context->state)
+				break;
+			if (req->wb_index != (prev->wb_index + 1))
+				break;
+
+			if (req->wb_pgbase != 0)
+				break;
+		}
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		npages++;
+		if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
+			break;
+		if (npages >= nmax)
+			break;
+	}
+	return npages;
+}
+
+/**
+ * nfs_scan_list - Scan a list for matching requests
+ * @head: One of the NFS inode request lists
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_list(struct list_head *head, struct list_head *dst,
+	      unsigned long idx_start, unsigned int npages)
+{
+	struct list_head	*pos, *tmp;
+	struct nfs_page		*req;
+	unsigned long		idx_end;
+	int			res;
+
+	res = 0;
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	list_for_each_safe(pos, tmp, head) {
+
+		req = nfs_list_entry(pos);
+
+		if (req->wb_index < idx_start)
+			continue;
+		if (req->wb_index > idx_end)
+			break;
+
+		if (!nfs_lock_request(req))
+			continue;
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		res++;
+	}
+	return res;
+}
+
+int nfs_init_nfspagecache(void)
+{
+	nfs_page_cachep = kmem_cache_create("nfs_page",
+					    sizeof(struct nfs_page),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL, NULL);
+	if (nfs_page_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+	if (kmem_cache_destroy(nfs_page_cachep))
+		printk(KERN_INFO "nfs_page: not all structures were freed\n");
+}
+
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
new file mode 100644
index 00000000000..d31b4d6e5a5
--- /dev/null
+++ b/fs/nfs/proc.c
@@ -0,0 +1,655 @@
+/*
+ *  linux/fs/nfs/proc.c
+ *
+ *  Copyright (C) 1992, 1993, 1994  Rick Sladkey
+ *
+ *  OS-independent nfs remote procedure call functions
+ *
+ *  Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
+ *  so at last we can have decent(ish) throughput off a 
+ *  Sun server.
+ *
+ *  Coding optimized and cleaned up by Florian La Roche.
+ *  Note: Error returns are optimized for NFS_OK, which isn't translated via
+ *  nfs_stat_to_errno(), but happens to be already the right return code.
+ *
+ *  Also, the code currently doesn't check the size of the packet, when
+ *  it decodes the packet.
+ *
+ *  Feel free to fix it and mail me the diffs if it worries you.
+ *
+ *  Completely rewritten to support the new RPC call interface;
+ *  rewrote and moved the entire XDR stuff to xdr.c
+ *  --Olaf Kirch June 1996
+ *
+ *  The code below initializes all auto variables explicitly, otherwise
+ *  it will fail to work as a module (gcc generates a memset call for an
+ *  incomplete struct).
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+extern struct rpc_procinfo nfs_procedures[];
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_fsinfo *info)
+{
+	struct nfs_fattr *fattr = info->fattr;
+	struct nfs2_fsstat fsinfo;
+	int status;
+
+	dprintk("%s: call getattr\n", __FUNCTION__);
+	fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+	if (status)
+		return status;
+	dprintk("%s: call statfs\n", __FUNCTION__);
+	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
+	if (status)
+		return status;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+	return 0;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getattr\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_GETATTR,
+				fhandle, fattr, 0);
+	dprintk("NFS reply getattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		 struct iattr *sattr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs_sattrargs	arg = { 
+		.fh	= NFS_FH(inode),
+		.sattr	= sattr
+	};
+	int	status;
+
+	dprintk("NFS call  setattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	dprintk("NFS reply setattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct nfs_diropok	res = {
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	dprintk("NFS call  lookup %s\n", name->name);
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs_readlinkargs	args = {
+		.fh		= NFS_FH(inode),
+		.pgbase		= pgbase,
+		.pglen		= pglen,
+		.pages		= &page
+	};
+	int			status;
+
+	dprintk("NFS call  readlink\n");
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	dprintk("NFS reply readlink: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_read(struct nfs_read_data *rdata)
+{
+	int			flags = rdata->flags;
+	struct inode *		inode = rdata->inode;
+	struct nfs_fattr *	fattr = rdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
+		.rpc_argp	= &rdata->args,
+		.rpc_resp	= &rdata->res,
+		.rpc_cred	= rdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
+			(long long) rdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0) {
+		nfs_refresh_inode(inode, fattr);
+		/* Emulate the eof flag, which isn't normally needed in NFSv2
+		 * as it is guaranteed to always return the file attributes
+		 */
+		if (rdata->args.offset + rdata->args.count >= fattr->size)
+			rdata->res.eof = 1;
+	}
+	dprintk("NFS reply read: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_write(struct nfs_write_data *wdata)
+{
+	int			flags = wdata->flags;
+	struct inode *		inode = wdata->inode;
+	struct nfs_fattr *	fattr = wdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
+		.rpc_argp	= &wdata->args,
+		.rpc_resp	= &wdata->res,
+		.rpc_cred	= wdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
+			(long long) wdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0) {
+		nfs_refresh_inode(inode, fattr);
+		wdata->res.count = wdata->args.count;
+		wdata->verf.committed = NFS_FILE_SYNC;
+	}
+	dprintk("NFS reply write: %d\n", status);
+	return status < 0? status : wdata->res.count;
+}
+
+static int
+nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		int flags)
+{
+	struct nfs_fh		fhandle;
+	struct nfs_fattr	fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	fattr.valid = 0;
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply create: %d\n", status);
+	return status;
+}
+
+/*
+ * In NFSv2, mknod is grafted onto the create call.
+ */
+static int
+nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+	       dev_t rdev)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int status, mode;
+
+	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+
+	mode = sattr->ia_mode;
+	if (S_ISFIFO(mode)) {
+		sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
+		sattr->ia_valid &= ~ATTR_SIZE;
+	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+		sattr->ia_valid |= ATTR_SIZE;
+		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+	}
+
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+
+	if (status == -EINVAL && S_ISFIFO(mode)) {
+		sattr->ia_mode = mode;
+		fattr.valid = 0;
+		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	}
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+  
+static int
+nfs_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct rpc_message	msg = { 
+		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+		.rpc_cred	= NULL
+	};
+	int			status;
+
+	dprintk("NFS call  remove %s\n", name->name);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+	dprintk("NFS reply remove: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct nfs_diropargs	*arg;
+
+	arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
+	if (!arg)
+		return -ENOMEM;
+	arg->fh = NFS_FH(dir->d_inode);
+	arg->name = name->name;
+	arg->len = name->len;
+	msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
+	msg->rpc_argp = arg;
+	return 0;
+}
+
+static int
+nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	
+	if (msg->rpc_argp)
+		kfree(msg->rpc_argp);
+	return 0;
+}
+
+static int
+nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs_renameargs	arg = {
+		.fromfh		= NFS_FH(old_dir),
+		.fromname	= old_name->name,
+		.fromlen	= old_name->len,
+		.tofh		= NFS_FH(new_dir),
+		.toname		= new_name->name,
+		.tolen		= new_name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	dprintk("NFS reply rename: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs_linkargs	arg = {
+		.fromfh		= NFS_FH(inode),
+		.tofh		= NFS_FH(dir),
+		.toname		= name->name,
+		.tolen		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  link %s\n", name->name);
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	dprintk("NFS reply link: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		 struct iattr *sattr, struct nfs_fh *fhandle,
+		 struct nfs_fattr *fattr)
+{
+	struct nfs_symlinkargs	arg = {
+		.fromfh		= NFS_FH(dir),
+		.fromname	= name->name,
+		.fromlen	= name->len,
+		.topath		= path->name,
+		.tolen		= path->len,
+		.sattr		= sattr
+	};
+	int			status;
+
+	if (path->len > NFS2_MAXPATHLEN)
+		return -ENAMETOOLONG;
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	fattr->valid = 0;
+	fhandle->size = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	dprintk("NFS reply symlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mkdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rmdir %s\n", name->name);
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	dprintk("NFS reply rmdir: %d\n", status);
+	return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass a temporary
+ * buffer to the encode function, which installs it in the receive
+ * the receive iovec. The decode function just parses the reply to make
+ * sure it is syntactically correct; the entries itself are decoded
+ * from nfs_readdir by calling the decode_entry function directly.
+ */
+static int
+nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+		 u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs_readdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.cookie		= cookie,
+		.count		= count,
+		.pages		= &page
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+		.rpc_cred	= cred
+	};
+	int			status;
+
+	lock_kernel();
+
+	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+	dprintk("NFS reply readdir: %d\n", status);
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsstat *stat)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  statfs\n");
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	if (status)
+		goto out;
+	stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+	stat->fbytes = (u64)fsinfo.bfree  * fsinfo.bsize;
+	stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+	stat->tfiles = 0;
+	stat->ffiles = 0;
+	stat->afiles = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	if (status)
+		goto out;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_pathconf *info)
+{
+	info->max_link = 0;
+	info->max_namelen = NFS2_MAXNAMLEN;
+	return 0;
+}
+
+extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+	if (task->tk_status >= 0) {
+		nfs_refresh_inode(data->inode, data->res.fattr);
+		/* Emulate the eof flag, which isn't normally needed in NFSv2
+		 * as it is guaranteed to always return the file attributes
+		 */
+		if (data->args.offset + data->args.count >= data->res.fattr->size)
+			data->res.eof = 1;
+	}
+	nfs_readpage_result(task);
+}
+
+static void
+nfs_proc_read_setup(struct nfs_read_data *data)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_writeback_done(task);
+}
+
+static void
+nfs_proc_write_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
+	data->args.stable = NFS_FILE_SYNC;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+	BUG();
+}
+
+static int
+nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+
+struct nfs_rpc_ops	nfs_v2_clientops = {
+	.version	= 2,		       /* protocol version */
+	.dentry_ops	= &nfs_dentry_operations,
+	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.getroot	= nfs_proc_get_root,
+	.getattr	= nfs_proc_getattr,
+	.setattr	= nfs_proc_setattr,
+	.lookup		= nfs_proc_lookup,
+	.access		= NULL,		       /* access */
+	.readlink	= nfs_proc_readlink,
+	.read		= nfs_proc_read,
+	.write		= nfs_proc_write,
+	.commit		= NULL,		       /* commit */
+	.create		= nfs_proc_create,
+	.remove		= nfs_proc_remove,
+	.unlink_setup	= nfs_proc_unlink_setup,
+	.unlink_done	= nfs_proc_unlink_done,
+	.rename		= nfs_proc_rename,
+	.link		= nfs_proc_link,
+	.symlink	= nfs_proc_symlink,
+	.mkdir		= nfs_proc_mkdir,
+	.rmdir		= nfs_proc_rmdir,
+	.readdir	= nfs_proc_readdir,
+	.mknod		= nfs_proc_mknod,
+	.statfs		= nfs_proc_statfs,
+	.fsinfo		= nfs_proc_fsinfo,
+	.pathconf	= nfs_proc_pathconf,
+	.decode_dirent	= nfs_decode_dirent,
+	.read_setup	= nfs_proc_read_setup,
+	.write_setup	= nfs_proc_write_setup,
+	.commit_setup	= nfs_proc_commit_setup,
+	.file_open	= nfs_open,
+	.file_release	= nfs_release,
+	.lock		= nfs_proc_lock,
+};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
new file mode 100644
index 00000000000..a0042fb5863
--- /dev/null
+++ b/fs/nfs/read.c
@@ -0,0 +1,618 @@
+/*
+ * linux/fs/nfs/read.c
+ *
+ * Block I/O for NFS
+ *
+ * Partial copy of Linus' read cache modifications to fs/nfs/file.c
+ * modified for async RPC by okir@monad.swb.de
+ *
+ * We do an ugly hack here in order to return proper error codes to the
+ * user program when a read request failed: since generic_file_read
+ * only checks the return value of inode->i_op->readpage() which is always 0
+ * for async RPC, we set the error bit of the page to 1 when an error occurs,
+ * and make nfs_readpage transmit requests synchronously when encountering this.
+ * This is only a small problem, though, since we now retry all operations
+ * within the RPC code when root squashing is suspected.
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#include <asm/system.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+
+static int nfs_pagein_one(struct list_head *, struct inode *);
+static void nfs_readpage_result_partial(struct nfs_read_data *, int);
+static void nfs_readpage_result_full(struct nfs_read_data *, int);
+
+static kmem_cache_t *nfs_rdata_cachep;
+mempool_t *nfs_rdata_mempool;
+
+#define MIN_POOL_READ	(32)
+
+void nfs_readdata_release(struct rpc_task *task)
+{
+        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
+        nfs_readdata_free(data);
+}
+
+static
+unsigned int nfs_page_length(struct inode *inode, struct page *page)
+{
+	loff_t i_size = i_size_read(inode);
+	unsigned long idx;
+
+	if (i_size <= 0)
+		return 0;
+	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	if (page->index > idx)
+		return 0;
+	if (page->index != idx)
+		return PAGE_CACHE_SIZE;
+	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+}
+
+static
+int nfs_return_empty_page(struct page *page)
+{
+	memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
+	SetPageUptodate(page);
+	unlock_page(page);
+	return 0;
+}
+
+/*
+ * Read a page synchronously.
+ */
+static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
+		struct page *page)
+{
+	unsigned int	rsize = NFS_SERVER(inode)->rsize;
+	unsigned int	count = PAGE_CACHE_SIZE;
+	int		result;
+	struct nfs_read_data *rdata;
+
+	rdata = nfs_readdata_alloc();
+	if (!rdata)
+		return -ENOMEM;
+
+	memset(rdata, 0, sizeof(*rdata));
+	rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rdata->cred = ctx->cred;
+	rdata->inode = inode;
+	INIT_LIST_HEAD(&rdata->pages);
+	rdata->args.fh = NFS_FH(inode);
+	rdata->args.context = ctx;
+	rdata->args.pages = &page;
+	rdata->args.pgbase = 0UL;
+	rdata->args.count = rsize;
+	rdata->res.fattr = &rdata->fattr;
+
+	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
+
+	/*
+	 * This works now because the socket layer never tries to DMA
+	 * into this buffer directly.
+	 */
+	do {
+		if (count < rsize)
+			rdata->args.count = count;
+		rdata->res.count = rdata->args.count;
+		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
+
+		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
+			NFS_SERVER(inode)->hostname,
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			(unsigned long long)rdata->args.pgbase,
+			rdata->args.count);
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->read(rdata);
+		unlock_kernel();
+
+		/*
+		 * Even if we had a partial success we can't mark the page
+		 * cache valid.
+		 */
+		if (result < 0) {
+			if (result == -EISDIR)
+				result = -EINVAL;
+			goto io_error;
+		}
+		count -= result;
+		rdata->args.pgbase += result;
+		/* Note: result == 0 should only happen if we're caching
+		 * a write that extends the file and punches a hole.
+		 */
+		if (rdata->res.eof != 0 || result == 0)
+			break;
+	} while (count);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+
+	if (count)
+		memclear_highpage_flush(page, rdata->args.pgbase, count);
+	SetPageUptodate(page);
+	if (PageError(page))
+		ClearPageError(page);
+	result = 0;
+
+io_error:
+	unlock_page(page);
+	nfs_readdata_free(rdata);
+	return result;
+}
+
+static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+		struct page *page)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page	*new;
+	unsigned int len;
+
+	len = nfs_page_length(inode, page);
+	if (len == 0)
+		return nfs_return_empty_page(page);
+	new = nfs_create_request(ctx, inode, page, 0, len);
+	if (IS_ERR(new)) {
+		unlock_page(page);
+		return PTR_ERR(new);
+	}
+	if (len < PAGE_CACHE_SIZE)
+		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+
+	nfs_lock_request(new);
+	nfs_list_add_request(new, &one_request);
+	nfs_pagein_one(&one_request, inode);
+	return 0;
+}
+
+static void nfs_readpage_release(struct nfs_page *req)
+{
+	unlock_page(req->wb_page);
+
+	nfs_clear_request(req);
+	nfs_release_request(req);
+	nfs_unlock_request(req);
+
+	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+			req->wb_context->dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_bytes,
+			(long long)req_offset(req));
+}
+
+/*
+ * Set up the NFS read request struct
+ */
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		unsigned int count, unsigned int offset)
+{
+	struct inode		*inode;
+
+	data->req	  = req;
+	data->inode	  = inode = req->wb_context->dentry->d_inode;
+	data->cred	  = req->wb_context->cred;
+
+	data->args.fh     = NFS_FH(inode);
+	data->args.offset = req_offset(req) + offset;
+	data->args.pgbase = req->wb_pgbase + offset;
+	data->args.pages  = data->pagevec;
+	data->args.count  = count;
+	data->args.context = req->wb_context;
+
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.eof     = 0;
+
+	NFS_PROTO(inode)->read_setup(data);
+
+	data->task.tk_cookie = (unsigned long)inode;
+	data->task.tk_calldata = data;
+	/* Release requests */
+	data->task.tk_release = nfs_readdata_release;
+
+	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+			data->task.tk_pid,
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			count,
+			(unsigned long long)data->args.offset);
+}
+
+static void
+nfs_async_read_error(struct list_head *head)
+{
+	struct nfs_page	*req;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		SetPageError(req->wb_page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * Start an async read operation
+ */
+static void nfs_execute_read(struct nfs_read_data *data)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+	sigset_t oldset;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+	rpc_clnt_sigunmask(clnt, &oldset);
+}
+
+/*
+ * Generate multiple requests to fill a single page.
+ *
+ * We optimize to reduce the number of read operations on the wire.  If we
+ * detect that we're reading a page, or an area of a page, that is past the
+ * end of file, we do not generate NFS read operations but just clear the
+ * parts of the page that would have come back zero from the server anyway.
+ *
+ * We rely on the cached value of i_size to make this determination; another
+ * client can fill pages on the server past our cached end-of-file, but we
+ * won't see the new data until our attribute cache is updated.  This is more
+ * or less conventional NFS client behavior.
+ */
+static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
+{
+	struct nfs_page *req = nfs_list_entry(head->next);
+	struct page *page = req->wb_page;
+	struct nfs_read_data *data;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
+	unsigned int nbytes, offset;
+	int requests = 0;
+	LIST_HEAD(list);
+
+	nfs_list_remove_request(req);
+
+	nbytes = req->wb_bytes;
+	for(;;) {
+		data = nfs_readdata_alloc();
+		if (!data)
+			goto out_bad;
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, &list);
+		requests++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	atomic_set(&req->wb_complete, requests);
+
+	ClearPageError(page);
+	offset = 0;
+	nbytes = req->wb_bytes;
+	do {
+		data = list_entry(list.next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->pagevec[0] = page;
+		data->complete = nfs_readpage_result_partial;
+
+		if (nbytes > rsize) {
+			nfs_read_rpcsetup(req, data, rsize, offset);
+			offset += rsize;
+			nbytes -= rsize;
+		} else {
+			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nbytes = 0;
+		}
+		nfs_execute_read(data);
+	} while (nbytes != 0);
+
+	return 0;
+
+out_bad:
+	while (!list_empty(&list)) {
+		data = list_entry(list.next, struct nfs_read_data, pages);
+		list_del(&data->pages);
+		nfs_readdata_free(data);
+	}
+	SetPageError(page);
+	nfs_readpage_release(req);
+	return -ENOMEM;
+}
+
+static int nfs_pagein_one(struct list_head *head, struct inode *inode)
+{
+	struct nfs_page		*req;
+	struct page		**pages;
+	struct nfs_read_data	*data;
+	unsigned int		count;
+
+	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
+		return nfs_pagein_multi(head, inode);
+
+	data = nfs_readdata_alloc();
+	if (!data)
+		goto out_bad;
+
+	INIT_LIST_HEAD(&data->pages);
+	pages = data->pagevec;
+	count = 0;
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		ClearPageError(req->wb_page);
+		*pages++ = req->wb_page;
+		count += req->wb_bytes;
+	}
+	req = nfs_list_entry(data->pages.next);
+
+	data->complete = nfs_readpage_result_full;
+	nfs_read_rpcsetup(req, data, count, 0);
+
+	nfs_execute_read(data);
+	return 0;
+out_bad:
+	nfs_async_read_error(head);
+	return -ENOMEM;
+}
+
+static int
+nfs_pagein_list(struct list_head *head, int rpages)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page		*req;
+	int			error = 0;
+	unsigned int		pages = 0;
+
+	while (!list_empty(head)) {
+		pages += nfs_coalesce_requests(head, &one_request, rpages);
+		req = nfs_list_entry(one_request.next);
+		error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
+		if (error < 0)
+			break;
+	}
+	if (error >= 0)
+		return pages;
+
+	nfs_async_read_error(head);
+	return error;
+}
+
+/*
+ * Handle a read reply that fills part of a page.
+ */
+static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+{
+	struct nfs_page *req = data->req;
+	struct page *page = req->wb_page;
+ 
+	if (status >= 0) {
+		unsigned int request = data->args.count;
+		unsigned int result = data->res.count;
+
+		if (result < request) {
+			memclear_highpage_flush(page,
+						data->args.pgbase + result,
+						request - result);
+		}
+	} else
+		SetPageError(page);
+
+	if (atomic_dec_and_test(&req->wb_complete)) {
+		if (!PageError(page))
+			SetPageUptodate(page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+{
+	unsigned int count = data->res.count;
+
+	while (!list_empty(&data->pages)) {
+		struct nfs_page *req = nfs_list_entry(data->pages.next);
+		struct page *page = req->wb_page;
+		nfs_list_remove_request(req);
+
+		if (status >= 0) {
+			if (count < PAGE_CACHE_SIZE) {
+				if (count < req->wb_bytes)
+					memclear_highpage_flush(page,
+							req->wb_pgbase + count,
+							req->wb_bytes - count);
+				count = 0;
+			} else
+				count -= PAGE_CACHE_SIZE;
+			SetPageUptodate(page);
+		} else
+			SetPageError(page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+void nfs_readpage_result(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+	struct nfs_readargs *argp = &data->args;
+	struct nfs_readres *resp = &data->res;
+	int status = task->tk_status;
+
+	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
+		task->tk_pid, status);
+
+	/* Is this a short read? */
+	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		/* Has the server at least made some progress? */
+		if (resp->count != 0) {
+			/* Yes, so retry the read at the end of the data */
+			argp->offset += resp->count;
+			argp->pgbase += resp->count;
+			argp->count -= resp->count;
+			rpc_restart_call(task);
+			return;
+		}
+		task->tk_status = -EIO;
+	}
+	NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+	data->complete(data, status);
+}
+
+/*
+ * Read a page over NFS.
+ * We read the page synchronously in the following case:
+ *  -	The error flag is set for this page. This happens only when a
+ *	previous async read operation failed.
+ */
+int nfs_readpage(struct file *file, struct page *page)
+{
+	struct nfs_open_context *ctx;
+	struct inode *inode = page->mapping->host;
+	int		error;
+
+	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
+		page, PAGE_CACHE_SIZE, page->index);
+	/*
+	 * Try to flush any pending writes to the file..
+	 *
+	 * NOTE! Because we own the page lock, there cannot
+	 * be any new pending writes generated at this point
+	 * for this page (other pages can be written to).
+	 */
+	error = nfs_wb_page(inode, page);
+	if (error)
+		goto out_error;
+
+	if (file == NULL) {
+		ctx = nfs_find_open_context(inode, FMODE_READ);
+		if (ctx == NULL)
+			return -EBADF;
+	} else
+		ctx = get_nfs_open_context((struct nfs_open_context *)
+				file->private_data);
+	if (!IS_SYNC(inode)) {
+		error = nfs_readpage_async(ctx, inode, page);
+		goto out;
+	}
+
+	error = nfs_readpage_sync(ctx, inode, page);
+	if (error < 0 && IS_SWAPFILE(inode))
+		printk("Aiee.. nfs swap-in of page failed!\n");
+out:
+	put_nfs_open_context(ctx);
+	return error;
+
+out_error:
+	unlock_page(page);
+	return error;
+}
+
+struct nfs_readdesc {
+	struct list_head *head;
+	struct nfs_open_context *ctx;
+};
+
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
+	struct inode *inode = page->mapping->host;
+	struct nfs_page *new;
+	unsigned int len;
+
+	nfs_wb_page(inode, page);
+	len = nfs_page_length(inode, page);
+	if (len == 0)
+		return nfs_return_empty_page(page);
+	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+	if (IS_ERR(new)) {
+			SetPageError(page);
+			unlock_page(page);
+			return PTR_ERR(new);
+	}
+	if (len < PAGE_CACHE_SIZE)
+		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+	nfs_lock_request(new);
+	nfs_list_add_request(new, desc->head);
+	return 0;
+}
+
+int nfs_readpages(struct file *filp, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+	LIST_HEAD(head);
+	struct nfs_readdesc desc = {
+		.head		= &head,
+	};
+	struct inode *inode = mapping->host;
+	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
+
+	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			nr_pages);
+
+	if (filp == NULL) {
+		desc.ctx = nfs_find_open_context(inode, FMODE_READ);
+		if (desc.ctx == NULL)
+			return -EBADF;
+	} else
+		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
+				filp->private_data);
+	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+	if (!list_empty(&head)) {
+		int err = nfs_pagein_list(&head, server->rpages);
+		if (!ret)
+			ret = err;
+	}
+	put_nfs_open_context(desc.ctx);
+	return ret;
+}
+
+int nfs_init_readpagecache(void)
+{
+	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
+					     sizeof(struct nfs_read_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (nfs_rdata_cachep == NULL)
+		return -ENOMEM;
+
+	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
+					   mempool_alloc_slab,
+					   mempool_free_slab,
+					   nfs_rdata_cachep);
+	if (nfs_rdata_mempool == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_readpagecache(void)
+{
+	mempool_destroy(nfs_rdata_mempool);
+	if (kmem_cache_destroy(nfs_rdata_cachep))
+		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
+}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
new file mode 100644
index 00000000000..35f10659914
--- /dev/null
+++ b/fs/nfs/symlink.c
@@ -0,0 +1,117 @@
+/*
+ *  linux/fs/nfs/symlink.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  Optimization changes Copyright (C) 1994 Florian La Roche
+ *
+ *  Jun 7 1999, cache symlink lookups in the page cache.  -DaveM
+ *
+ *  nfs symlink handling code
+ */
+
+#define NFS_NEED_XDR_TYPES
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+/* Symlink caching in the page cache is even more simplistic
+ * and straight-forward than readdir caching.
+ *
+ * At the beginning of the page we store pointer to struct page in question,
+ * simplifying nfs_put_link() (if inode got invalidated we can't find the page
+ * to be freed via pagecache lookup).
+ * The NUL-terminated string follows immediately thereafter.
+ */
+
+struct nfs_symlink {
+	struct page *page;
+	char body[0];
+};
+
+static int nfs_symlink_filler(struct inode *inode, struct page *page)
+{
+	const unsigned int pgbase = offsetof(struct nfs_symlink, body);
+	const unsigned int pglen = PAGE_SIZE - pgbase;
+	int error;
+
+	lock_kernel();
+	error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
+	unlock_kernel();
+	if (error < 0)
+		goto error;
+	SetPageUptodate(page);
+	unlock_page(page);
+	return 0;
+
+error:
+	SetPageError(page);
+	unlock_page(page);
+	return -EIO;
+}
+
+static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode *inode = dentry->d_inode;
+	struct page *page;
+	struct nfs_symlink *p;
+	void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
+	if (err)
+		goto read_failed;
+	page = read_cache_page(&inode->i_data, 0,
+				(filler_t *)nfs_symlink_filler, inode);
+	if (IS_ERR(page)) {
+		err = page;
+		goto read_failed;
+	}
+	if (!PageUptodate(page)) {
+		err = ERR_PTR(-EIO);
+		goto getlink_read_error;
+	}
+	p = kmap(page);
+	p->page = page;
+	nd_set_link(nd, p->body);
+	return 0;
+
+getlink_read_error:
+	page_cache_release(page);
+read_failed:
+	nd_set_link(nd, err);
+	return 0;
+}
+
+static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s)) {
+		struct nfs_symlink *p;
+		struct page *page;
+
+		p = container_of(s, struct nfs_symlink, body[0]);
+		page = p->page;
+
+		kunmap(page);
+		page_cache_release(page);
+	}
+}
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations nfs_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= nfs_follow_link,
+	.put_link	= nfs_put_link,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
new file mode 100644
index 00000000000..f732541a333
--- /dev/null
+++ b/fs/nfs/unlink.c
@@ -0,0 +1,227 @@
+/*
+ *  linux/fs/nfs/unlink.c
+ *
+ * nfs sillydelete handling
+ *
+ * NOTE: we rely on holding the BKL for list manipulation protection.
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dcache.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+
+
+struct nfs_unlinkdata {
+	struct nfs_unlinkdata	*next;
+	struct dentry	*dir, *dentry;
+	struct qstr	name;
+	struct rpc_task	task;
+	struct rpc_cred	*cred;
+	unsigned int	count;
+};
+
+static struct nfs_unlinkdata	*nfs_deletes;
+static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue");
+
+/**
+ * nfs_detach_unlinkdata - Remove asynchronous unlink from global list
+ * @data: pointer to descriptor
+ */
+static inline void
+nfs_detach_unlinkdata(struct nfs_unlinkdata *data)
+{
+	struct nfs_unlinkdata	**q;
+
+	for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) {
+		if (*q == data) {
+			*q = data->next;
+			break;
+		}
+	}
+}
+
+/**
+ * nfs_put_unlinkdata - release data from a sillydelete operation.
+ * @data: pointer to unlink structure.
+ */
+static void
+nfs_put_unlinkdata(struct nfs_unlinkdata *data)
+{
+	if (--data->count == 0) {
+		nfs_detach_unlinkdata(data);
+		if (data->name.name != NULL)
+			kfree(data->name.name);
+		kfree(data);
+	}
+}
+
+#define NAME_ALLOC_LEN(len)	((len+16) & ~15)
+/**
+ * nfs_copy_dname - copy dentry name to data structure
+ * @dentry: pointer to dentry
+ * @data: nfs_unlinkdata
+ */
+static inline void
+nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+	char		*str;
+	int		len = dentry->d_name.len;
+
+	str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL);
+	if (!str)
+		return;
+	memcpy(str, dentry->d_name.name, len);
+	if (!data->name.len) {
+		data->name.len = len;
+		data->name.name = str;
+	} else
+		kfree(str);
+}
+
+/**
+ * nfs_async_unlink_init - Initialize the RPC info
+ * @task: rpc_task of the sillydelete
+ *
+ * We delay initializing RPC info until after the call to dentry_iput()
+ * in order to minimize races against rename().
+ */
+static void
+nfs_async_unlink_init(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct dentry		*dir = data->dir;
+	struct rpc_message	msg = {
+		.rpc_cred	= data->cred,
+	};
+	int			status = -ENOENT;
+
+	if (!data->name.len)
+		goto out_err;
+
+	status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
+	if (status < 0)
+		goto out_err;
+	nfs_begin_data_update(dir->d_inode);
+	rpc_call_setup(task, &msg, 0);
+	return;
+ out_err:
+	rpc_exit(task, status);
+}
+
+/**
+ * nfs_async_unlink_done - Sillydelete post-processing
+ * @task: rpc_task of the sillydelete
+ *
+ * Do the directory attribute update.
+ */
+static void
+nfs_async_unlink_done(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct dentry		*dir = data->dir;
+	struct inode		*dir_i;
+
+	if (!dir)
+		return;
+	dir_i = dir->d_inode;
+	nfs_end_data_update(dir_i);
+	if (NFS_PROTO(dir_i)->unlink_done(dir, task))
+		return;
+	put_rpccred(data->cred);
+	data->cred = NULL;
+	dput(dir);
+}
+
+/**
+ * nfs_async_unlink_release - Release the sillydelete data.
+ * @task: rpc_task of the sillydelete
+ *
+ * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
+ * rpc_task would be freed too.
+ */
+static void
+nfs_async_unlink_release(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	nfs_put_unlinkdata(data);
+}
+
+/**
+ * nfs_async_unlink - asynchronous unlinking of a file
+ * @dentry: dentry to unlink
+ */
+int
+nfs_async_unlink(struct dentry *dentry)
+{
+	struct dentry	*dir = dentry->d_parent;
+	struct nfs_unlinkdata	*data;
+	struct rpc_task	*task;
+	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
+	int		status = -ENOMEM;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out;
+	memset(data, 0, sizeof(*data));
+
+	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
+	if (IS_ERR(data->cred)) {
+		status = PTR_ERR(data->cred);
+		goto out_free;
+	}
+	data->dir = dget(dir);
+	data->dentry = dentry;
+
+	data->next = nfs_deletes;
+	nfs_deletes = data;
+	data->count = 1;
+
+	task = &data->task;
+	rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
+	task->tk_calldata = data;
+	task->tk_action = nfs_async_unlink_init;
+	task->tk_release = nfs_async_unlink_release;
+
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+	spin_unlock(&dentry->d_lock);
+
+	rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
+	status = 0;
+ out:
+	return status;
+out_free:
+	kfree(data);
+	return status;
+}
+
+/**
+ * nfs_complete_unlink - Initialize completion of the sillydelete
+ * @dentry: dentry to delete
+ *
+ * Since we're most likely to be called by dentry_iput(), we
+ * only use the dentry to find the sillydelete. We then copy the name
+ * into the qstr.
+ */
+void
+nfs_complete_unlink(struct dentry *dentry)
+{
+	struct nfs_unlinkdata	*data;
+
+	for(data = nfs_deletes; data != NULL; data = data->next) {
+		if (dentry == data->dentry)
+			break;
+	}
+	if (!data)
+		return;
+	data->count++;
+	nfs_copy_dname(dentry, data);
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+	spin_unlock(&dentry->d_lock);
+	rpc_wake_up_task(&data->task);
+	nfs_put_unlinkdata(data);
+}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
new file mode 100644
index 00000000000..6f7a4af3bc4
--- /dev/null
+++ b/fs/nfs/write.c
@@ -0,0 +1,1431 @@
+/*
+ * linux/fs/nfs/write.c
+ *
+ * Writing file data over NFS.
+ *
+ * We do it like this: When a (user) process wishes to write data to an
+ * NFS file, a write request is allocated that contains the RPC task data
+ * plus some info on the page to be written, and added to the inode's
+ * write chain. If the process writes past the end of the page, an async
+ * RPC call to write the page is scheduled immediately; otherwise, the call
+ * is delayed for a few seconds.
+ *
+ * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
+ *
+ * Write requests are kept on the inode's writeback list. Each entry in
+ * that list references the page (portion) to be written. When the
+ * cache timeout has expired, the RPC task is woken up, and tries to
+ * lock the page. As soon as it manages to do so, the request is moved
+ * from the writeback list to the writelock list.
+ *
+ * Note: we must make sure never to confuse the inode passed in the
+ * write_page request with the one in page->inode. As far as I understand
+ * it, these are different when doing a swap-out.
+ *
+ * To understand everything that goes on here and in the NFS read code,
+ * one should be aware that a page is locked in exactly one of the following
+ * cases:
+ *
+ *  -	A write request is in progress.
+ *  -	A user process is in generic_file_write/nfs_update_page
+ *  -	A user process is in generic_file_read
+ *
+ * Also note that because of the way pages are invalidated in
+ * nfs_revalidate_inode, the following assertions hold:
+ *
+ *  -	If a page is dirty, there will be no read requests (a page will
+ *	not be re-read unless invalidated by nfs_revalidate_inode).
+ *  -	If the page is not uptodate, there will be no pending write
+ *	requests, and no process will be in nfs_update_page.
+ *
+ * FIXME: Interaction with the vmscan routines is not optimal yet.
+ * Either vmscan must be made nfs-savvy, or we need a different page
+ * reclaim concept that supports something like FS-independent
+ * buffer_heads with a b_ops-> field.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs_page.h>
+#include <asm/uaccess.h>
+#include <linux/smp_lock.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+
+#define MIN_POOL_WRITE		(32)
+#define MIN_POOL_COMMIT		(4)
+
+/*
+ * Local function declarations
+ */
+static struct nfs_page * nfs_update_request(struct nfs_open_context*,
+					    struct inode *,
+					    struct page *,
+					    unsigned int, unsigned int);
+static void nfs_writeback_done_partial(struct nfs_write_data *, int);
+static void nfs_writeback_done_full(struct nfs_write_data *, int);
+static int nfs_wait_on_write_congestion(struct address_space *, int);
+static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
+static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
+			   unsigned int npages, int how);
+
+static kmem_cache_t *nfs_wdata_cachep;
+mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_commit_mempool;
+
+static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
+
+static inline struct nfs_write_data *nfs_commit_alloc(void)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static inline void nfs_commit_free(struct nfs_write_data *p)
+{
+	mempool_free(p, nfs_commit_mempool);
+}
+
+static void nfs_writedata_release(struct rpc_task *task)
+{
+	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
+	nfs_writedata_free(wdata);
+}
+
+/* Adjust the file length if we're writing beyond the end */
+static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
+{
+	struct inode *inode = page->mapping->host;
+	loff_t end, i_size = i_size_read(inode);
+	unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+
+	if (i_size > 0 && page->index < end_index)
+		return;
+	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
+	if (i_size >= end)
+		return;
+	i_size_write(inode, end);
+}
+
+/* We can set the PG_uptodate flag if we see that a write request
+ * covers the full page.
+ */
+static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+{
+	loff_t end_offs;
+
+	if (PageUptodate(page))
+		return;
+	if (base != 0)
+		return;
+	if (count == PAGE_CACHE_SIZE) {
+		SetPageUptodate(page);
+		return;
+	}
+
+	end_offs = i_size_read(page->mapping->host) - 1;
+	if (end_offs < 0)
+		return;
+	/* Is this the last page? */
+	if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
+		return;
+	/* This is the last page: set PG_uptodate if we cover the entire
+	 * extent of the data, then zero the rest of the page.
+	 */
+	if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
+		memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
+		SetPageUptodate(page);
+	}
+}
+
+/*
+ * Write a page synchronously.
+ * Offset is the data offset within the page.
+ */
+static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
+		struct page *page, unsigned int offset, unsigned int count,
+		int how)
+{
+	unsigned int	wsize = NFS_SERVER(inode)->wsize;
+	int		result, written = 0;
+	struct nfs_write_data *wdata;
+
+	wdata = nfs_writedata_alloc();
+	if (!wdata)
+		return -ENOMEM;
+
+	wdata->flags = how;
+	wdata->cred = ctx->cred;
+	wdata->inode = inode;
+	wdata->args.fh = NFS_FH(inode);
+	wdata->args.context = ctx;
+	wdata->args.pages = &page;
+	wdata->args.stable = NFS_FILE_SYNC;
+	wdata->args.pgbase = offset;
+	wdata->args.count = wsize;
+	wdata->res.fattr = &wdata->fattr;
+	wdata->res.verf = &wdata->verf;
+
+	dprintk("NFS:      nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
+		inode->i_sb->s_id,
+		(long long)NFS_FILEID(inode),
+		count, (long long)(page_offset(page) + offset));
+
+	nfs_begin_data_update(inode);
+	do {
+		if (count < wsize)
+			wdata->args.count = count;
+		wdata->args.offset = page_offset(page) + wdata->args.pgbase;
+
+		result = NFS_PROTO(inode)->write(wdata);
+
+		if (result < 0) {
+			/* Must mark the page invalid after I/O error */
+			ClearPageUptodate(page);
+			goto io_error;
+		}
+		if (result < wdata->args.count)
+			printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
+					wdata->args.count, result);
+
+		wdata->args.offset += result;
+	        wdata->args.pgbase += result;
+		written += result;
+		count -= result;
+	} while (count);
+	/* Update file length */
+	nfs_grow_file(page, offset, written);
+	/* Set the PG_uptodate flag? */
+	nfs_mark_uptodate(page, offset, written);
+
+	if (PageError(page))
+		ClearPageError(page);
+
+io_error:
+	nfs_end_data_update_defer(inode);
+	nfs_writedata_free(wdata);
+	return written ? written : result;
+}
+
+static int nfs_writepage_async(struct nfs_open_context *ctx,
+		struct inode *inode, struct page *page,
+		unsigned int offset, unsigned int count)
+{
+	struct nfs_page	*req;
+	int		status;
+
+	req = nfs_update_request(ctx, inode, page, offset, count);
+	status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+	if (status < 0)
+		goto out;
+	/* Update file length */
+	nfs_grow_file(page, offset, count);
+	/* Set the PG_uptodate flag? */
+	nfs_mark_uptodate(page, offset, count);
+	nfs_unlock_request(req);
+ out:
+	return status;
+}
+
+static int wb_priority(struct writeback_control *wbc)
+{
+	if (wbc->for_reclaim)
+		return FLUSH_HIGHPRI;
+	if (wbc->for_kupdate)
+		return FLUSH_LOWPRI;
+	return 0;
+}
+
+/*
+ * Write an mmapped page to the server.
+ */
+int nfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct nfs_open_context *ctx;
+	struct inode *inode = page->mapping->host;
+	unsigned long end_index;
+	unsigned offset = PAGE_CACHE_SIZE;
+	loff_t i_size = i_size_read(inode);
+	int inode_referenced = 0;
+	int priority = wb_priority(wbc);
+	int err;
+
+	/*
+	 * Note: We need to ensure that we have a reference to the inode
+	 *       if we are to do asynchronous writes. If not, waiting
+	 *       in nfs_wait_on_request() may deadlock with clear_inode().
+	 *
+	 *       If igrab() fails here, then it is in any case safe to
+	 *       call nfs_wb_page(), since there will be no pending writes.
+	 */
+	if (igrab(inode) != 0)
+		inode_referenced = 1;
+	end_index = i_size >> PAGE_CACHE_SHIFT;
+
+	/* Ensure we've flushed out any previous writes */
+	nfs_wb_page_priority(inode, page, priority);
+
+	/* easy case */
+	if (page->index < end_index)
+		goto do_it;
+	/* things got complicated... */
+	offset = i_size & (PAGE_CACHE_SIZE-1);
+
+	/* OK, are we completely out? */
+	err = 0; /* potential race with truncate - ignore */
+	if (page->index >= end_index+1 || !offset)
+		goto out;
+do_it:
+	ctx = nfs_find_open_context(inode, FMODE_WRITE);
+	if (ctx == NULL) {
+		err = -EBADF;
+		goto out;
+	}
+	lock_kernel();
+	if (!IS_SYNC(inode) && inode_referenced) {
+		err = nfs_writepage_async(ctx, inode, page, 0, offset);
+		if (err >= 0) {
+			err = 0;
+			if (wbc->for_reclaim)
+				nfs_flush_inode(inode, 0, 0, FLUSH_STABLE);
+		}
+	} else {
+		err = nfs_writepage_sync(ctx, inode, page, 0,
+						offset, priority);
+		if (err >= 0) {
+			if (err != offset)
+				redirty_page_for_writepage(wbc, page);
+			err = 0;
+		}
+	}
+	unlock_kernel();
+	put_nfs_open_context(ctx);
+out:
+	unlock_page(page);
+	if (inode_referenced)
+		iput(inode);
+	return err; 
+}
+
+/*
+ * Note: causes nfs_update_request() to block on the assumption
+ * 	 that the writeback is generated due to memory pressure.
+ */
+int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	struct inode *inode = mapping->host;
+	int err;
+
+	err = generic_writepages(mapping, wbc);
+	if (err)
+		return err;
+	while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
+		if (wbc->nonblocking)
+			return 0;
+		nfs_wait_on_write_congestion(mapping, 0);
+	}
+	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
+	if (err < 0)
+		goto out;
+	wbc->nr_to_write -= err;
+	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
+		err = nfs_wait_on_requests(inode, 0, 0);
+		if (err < 0)
+			goto out;
+	}
+	err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
+	if (err > 0) {
+		wbc->nr_to_write -= err;
+		err = 0;
+	}
+out:
+	clear_bit(BDI_write_congested, &bdi->state);
+	wake_up_all(&nfs_write_congestion);
+	return err;
+}
+
+/*
+ * Insert a write request into an inode
+ */
+static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int error;
+
+	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
+	BUG_ON(error == -EEXIST);
+	if (error)
+		return error;
+	if (!nfsi->npages) {
+		igrab(inode);
+		nfs_begin_data_update(inode);
+		if (nfs_have_delegation(inode, FMODE_WRITE))
+			nfsi->change_attr++;
+	}
+	nfsi->npages++;
+	atomic_inc(&req->wb_count);
+	return 0;
+}
+
+/*
+ * Insert a write request into an inode
+ */
+static void nfs_inode_remove_request(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	BUG_ON (!NFS_WBACK_BUSY(req));
+
+	spin_lock(&nfsi->req_lock);
+	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
+	nfsi->npages--;
+	if (!nfsi->npages) {
+		spin_unlock(&nfsi->req_lock);
+		nfs_end_data_update_defer(inode);
+		iput(inode);
+	} else
+		spin_unlock(&nfsi->req_lock);
+	nfs_clear_request(req);
+	nfs_release_request(req);
+}
+
+/*
+ * Find a request
+ */
+static inline struct nfs_page *
+_nfs_find_request(struct inode *inode, unsigned long index)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page *req;
+
+	req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
+	if (req)
+		atomic_inc(&req->wb_count);
+	return req;
+}
+
+static struct nfs_page *
+nfs_find_request(struct inode *inode, unsigned long index)
+{
+	struct nfs_page		*req;
+	struct nfs_inode	*nfsi = NFS_I(inode);
+
+	spin_lock(&nfsi->req_lock);
+	req = _nfs_find_request(inode, index);
+	spin_unlock(&nfsi->req_lock);
+	return req;
+}
+
+/*
+ * Add a request to the inode's dirty list.
+ */
+static void
+nfs_mark_request_dirty(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&nfsi->req_lock);
+	nfs_list_add_request(req, &nfsi->dirty);
+	nfsi->ndirty++;
+	spin_unlock(&nfsi->req_lock);
+	inc_page_state(nr_dirty);
+	mark_inode_dirty(inode);
+}
+
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+	return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+/*
+ * Add a request to the inode's commit list.
+ */
+static void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&nfsi->req_lock);
+	nfs_list_add_request(req, &nfsi->commit);
+	nfsi->ncommit++;
+	spin_unlock(&nfsi->req_lock);
+	inc_page_state(nr_unstable);
+	mark_inode_dirty(inode);
+}
+#endif
+
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+static int
+nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page *req;
+	unsigned long		idx_end, next;
+	unsigned int		res = 0;
+	int			error;
+
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	spin_lock(&nfsi->req_lock);
+	next = idx_start;
+	while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+		if (req->wb_index > idx_end)
+			break;
+
+		next = req->wb_index + 1;
+		if (!NFS_WBACK_BUSY(req))
+			continue;
+
+		atomic_inc(&req->wb_count);
+		spin_unlock(&nfsi->req_lock);
+		error = nfs_wait_on_request(req);
+		nfs_release_request(req);
+		if (error < 0)
+			return error;
+		spin_lock(&nfsi->req_lock);
+		res++;
+	}
+	spin_unlock(&nfsi->req_lock);
+	return res;
+}
+
+/*
+ * nfs_scan_dirty - Scan an inode for dirty requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves requests from the inode's dirty page list.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+static int
+nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int	res;
+	res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
+	nfsi->ndirty -= res;
+	sub_page_state(nr_dirty,res);
+	if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	return res;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+/*
+ * nfs_scan_commit - Scan an inode for commit requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves requests from the inode's 'commit' request list.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+static int
+nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int	res;
+	res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+	nfsi->ncommit -= res;
+	if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	return res;
+}
+#endif
+
+static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	DEFINE_WAIT(wait);
+	int ret = 0;
+
+	might_sleep();
+
+	if (!bdi_write_congested(bdi))
+		return 0;
+	if (intr) {
+		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
+		sigset_t oldset;
+
+		rpc_clnt_sigmask(clnt, &oldset);
+		prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
+		if (bdi_write_congested(bdi)) {
+			if (signalled())
+				ret = -ERESTARTSYS;
+			else
+				schedule();
+		}
+		rpc_clnt_sigunmask(clnt, &oldset);
+	} else {
+		prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
+		if (bdi_write_congested(bdi))
+			schedule();
+	}
+	finish_wait(&nfs_write_congestion, &wait);
+	return ret;
+}
+
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+		struct inode *inode, struct page *page,
+		unsigned int offset, unsigned int bytes)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page		*req, *new = NULL;
+	unsigned long		rqend, end;
+
+	end = offset + bytes;
+
+	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+		return ERR_PTR(-ERESTARTSYS);
+	for (;;) {
+		/* Loop over all inode entries and see if we find
+		 * A request for the page we wish to update
+		 */
+		spin_lock(&nfsi->req_lock);
+		req = _nfs_find_request(inode, page->index);
+		if (req) {
+			if (!nfs_lock_request_dontget(req)) {
+				int error;
+				spin_unlock(&nfsi->req_lock);
+				error = nfs_wait_on_request(req);
+				nfs_release_request(req);
+				if (error < 0)
+					return ERR_PTR(error);
+				continue;
+			}
+			spin_unlock(&nfsi->req_lock);
+			if (new)
+				nfs_release_request(new);
+			break;
+		}
+
+		if (new) {
+			int error;
+			nfs_lock_request_dontget(new);
+			error = nfs_inode_add_request(inode, new);
+			if (error) {
+				spin_unlock(&nfsi->req_lock);
+				nfs_unlock_request(new);
+				return ERR_PTR(error);
+			}
+			spin_unlock(&nfsi->req_lock);
+			nfs_mark_request_dirty(new);
+			return new;
+		}
+		spin_unlock(&nfsi->req_lock);
+
+		new = nfs_create_request(ctx, inode, page, offset, bytes);
+		if (IS_ERR(new))
+			return new;
+	}
+
+	/* We have a request for our page.
+	 * If the creds don't match, or the
+	 * page addresses don't match,
+	 * tell the caller to wait on the conflicting
+	 * request.
+	 */
+	rqend = req->wb_offset + req->wb_bytes;
+	if (req->wb_context != ctx
+	    || req->wb_page != page
+	    || !nfs_dirty_request(req)
+	    || offset > rqend || end < req->wb_offset) {
+		nfs_unlock_request(req);
+		return ERR_PTR(-EBUSY);
+	}
+
+	/* Okay, the request matches. Update the region */
+	if (offset < req->wb_offset) {
+		req->wb_offset = offset;
+		req->wb_pgbase = offset;
+		req->wb_bytes = rqend - req->wb_offset;
+	}
+
+	if (end > rqend)
+		req->wb_bytes = end - req->wb_offset;
+
+	return req;
+}
+
+int nfs_flush_incompatible(struct file *file, struct page *page)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct inode	*inode = page->mapping->host;
+	struct nfs_page	*req;
+	int		status = 0;
+	/*
+	 * Look for a request corresponding to this page. If there
+	 * is one, and it belongs to another file, we flush it out
+	 * before we try to copy anything into the page. Do this
+	 * due to the lack of an ACCESS-type call in NFSv2.
+	 * Also do the same if we find a request from an existing
+	 * dropped page.
+	 */
+	req = nfs_find_request(inode, page->index);
+	if (req) {
+		if (req->wb_page != page || ctx != req->wb_context)
+			status = nfs_wb_page(inode, page);
+		nfs_release_request(req);
+	}
+	return (status < 0) ? status : 0;
+}
+
+/*
+ * Update and possibly write a cached page of an NFS file.
+ *
+ * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
+ * things with a page scheduled for an RPC call (e.g. invalidate it).
+ */
+int nfs_updatepage(struct file *file, struct page *page,
+		unsigned int offset, unsigned int count)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct dentry	*dentry = file->f_dentry;
+	struct inode	*inode = page->mapping->host;
+	struct nfs_page	*req;
+	int		status = 0;
+
+	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		count, (long long)(page_offset(page) +offset));
+
+	if (IS_SYNC(inode)) {
+		status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
+		if (status > 0) {
+			if (offset == 0 && status == PAGE_CACHE_SIZE)
+				SetPageUptodate(page);
+			return 0;
+		}
+		return status;
+	}
+
+	/* If we're not using byte range locks, and we know the page
+	 * is entirely in cache, it may be more efficient to avoid
+	 * fragmenting write requests.
+	 */
+	if (PageUptodate(page) && inode->i_flock == NULL) {
+		loff_t end_offs = i_size_read(inode) - 1;
+		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
+
+		count += offset;
+		offset = 0;
+		if (unlikely(end_offs < 0)) {
+			/* Do nothing */
+		} else if (page->index == end_index) {
+			unsigned int pglen;
+			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
+			if (count < pglen)
+				count = pglen;
+		} else if (page->index < end_index)
+			count = PAGE_CACHE_SIZE;
+	}
+
+	/*
+	 * Try to find an NFS request corresponding to this page
+	 * and update it.
+	 * If the existing request cannot be updated, we must flush
+	 * it out now.
+	 */
+	do {
+		req = nfs_update_request(ctx, inode, page, offset, count);
+		status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+		if (status != -EBUSY)
+			break;
+		/* Request could not be updated. Flush it out and try again */
+		status = nfs_wb_page(inode, page);
+	} while (status >= 0);
+	if (status < 0)
+		goto done;
+
+	status = 0;
+
+	/* Update file length */
+	nfs_grow_file(page, offset, count);
+	/* Set the PG_uptodate flag? */
+	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+	nfs_unlock_request(req);
+done:
+        dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
+			status, (long long)i_size_read(inode));
+	if (status < 0)
+		ClearPageUptodate(page);
+	return status;
+}
+
+static void nfs_writepage_release(struct nfs_page *req)
+{
+	end_page_writeback(req->wb_page);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+	if (!PageError(req->wb_page)) {
+		if (NFS_NEED_RESCHED(req)) {
+			nfs_mark_request_dirty(req);
+			goto out;
+		} else if (NFS_NEED_COMMIT(req)) {
+			nfs_mark_request_commit(req);
+			goto out;
+		}
+	}
+	nfs_inode_remove_request(req);
+
+out:
+	nfs_clear_commit(req);
+	nfs_clear_reschedule(req);
+#else
+	nfs_inode_remove_request(req);
+#endif
+	nfs_unlock_request(req);
+}
+
+static inline int flush_task_priority(int how)
+{
+	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
+		case FLUSH_HIGHPRI:
+			return RPC_PRIORITY_HIGH;
+		case FLUSH_LOWPRI:
+			return RPC_PRIORITY_LOW;
+	}
+	return RPC_PRIORITY_NORMAL;
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static void nfs_write_rpcsetup(struct nfs_page *req,
+		struct nfs_write_data *data,
+		unsigned int count, unsigned int offset,
+		int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	data->req = req;
+	data->inode = inode = req->wb_context->dentry->d_inode;
+	data->cred = req->wb_context->cred;
+
+	data->args.fh     = NFS_FH(inode);
+	data->args.offset = req_offset(req) + offset;
+	data->args.pgbase = req->wb_pgbase + offset;
+	data->args.pages  = data->pagevec;
+	data->args.count  = count;
+	data->args.context = req->wb_context;
+
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.verf    = &data->verf;
+
+	NFS_PROTO(inode)->write_setup(data, how);
+
+	data->task.tk_priority = flush_task_priority(how);
+	data->task.tk_cookie = (unsigned long)inode;
+	data->task.tk_calldata = data;
+	/* Release requests */
+	data->task.tk_release = nfs_writedata_release;
+
+	dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		task->tk_pid,
+		inode->i_sb->s_id,
+		(long long)NFS_FILEID(inode),
+		count,
+		(unsigned long long)data->args.offset);
+}
+
+static void nfs_execute_write(struct nfs_write_data *data)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+	sigset_t oldset;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+	rpc_clnt_sigunmask(clnt, &oldset);
+}
+
+/*
+ * Generate multiple small requests to write out a single
+ * contiguous dirty area on one page.
+ */
+static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+{
+	struct nfs_page *req = nfs_list_entry(head->next);
+	struct page *page = req->wb_page;
+	struct nfs_write_data *data;
+	unsigned int wsize = NFS_SERVER(inode)->wsize;
+	unsigned int nbytes, offset;
+	int requests = 0;
+	LIST_HEAD(list);
+
+	nfs_list_remove_request(req);
+
+	nbytes = req->wb_bytes;
+	for (;;) {
+		data = nfs_writedata_alloc();
+		if (!data)
+			goto out_bad;
+		list_add(&data->pages, &list);
+		requests++;
+		if (nbytes <= wsize)
+			break;
+		nbytes -= wsize;
+	}
+	atomic_set(&req->wb_complete, requests);
+
+	ClearPageError(page);
+	SetPageWriteback(page);
+	offset = 0;
+	nbytes = req->wb_bytes;
+	do {
+		data = list_entry(list.next, struct nfs_write_data, pages);
+		list_del_init(&data->pages);
+
+		data->pagevec[0] = page;
+		data->complete = nfs_writeback_done_partial;
+
+		if (nbytes > wsize) {
+			nfs_write_rpcsetup(req, data, wsize, offset, how);
+			offset += wsize;
+			nbytes -= wsize;
+		} else {
+			nfs_write_rpcsetup(req, data, nbytes, offset, how);
+			nbytes = 0;
+		}
+		nfs_execute_write(data);
+	} while (nbytes != 0);
+
+	return 0;
+
+out_bad:
+	while (!list_empty(&list)) {
+		data = list_entry(list.next, struct nfs_write_data, pages);
+		list_del(&data->pages);
+		nfs_writedata_free(data);
+	}
+	nfs_mark_request_dirty(req);
+	nfs_unlock_request(req);
+	return -ENOMEM;
+}
+
+/*
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+{
+	struct nfs_page		*req;
+	struct page		**pages;
+	struct nfs_write_data	*data;
+	unsigned int		count;
+
+	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
+		return nfs_flush_multi(head, inode, how);
+
+	data = nfs_writedata_alloc();
+	if (!data)
+		goto out_bad;
+
+	pages = data->pagevec;
+	count = 0;
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		ClearPageError(req->wb_page);
+		SetPageWriteback(req->wb_page);
+		*pages++ = req->wb_page;
+		count += req->wb_bytes;
+	}
+	req = nfs_list_entry(data->pages.next);
+
+	data->complete = nfs_writeback_done_full;
+	/* Set up the argument struct */
+	nfs_write_rpcsetup(req, data, count, 0, how);
+
+	nfs_execute_write(data);
+	return 0;
+ out_bad:
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_dirty(req);
+		nfs_unlock_request(req);
+	}
+	return -ENOMEM;
+}
+
+static int
+nfs_flush_list(struct list_head *head, int wpages, int how)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page		*req;
+	int			error = 0;
+	unsigned int		pages = 0;
+
+	while (!list_empty(head)) {
+		pages += nfs_coalesce_requests(head, &one_request, wpages);
+		req = nfs_list_entry(one_request.next);
+		error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+		if (error < 0)
+			break;
+	}
+	if (error >= 0)
+		return pages;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_dirty(req);
+		nfs_unlock_request(req);
+	}
+	return error;
+}
+
+/*
+ * Handle a write reply that flushed part of a page.
+ */
+static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+{
+	struct nfs_page		*req = data->req;
+	struct page		*page = req->wb_page;
+
+	dprintk("NFS: write (%s/%Ld %d@%Ld)",
+		req->wb_context->dentry->d_inode->i_sb->s_id,
+		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+		req->wb_bytes,
+		(long long)req_offset(req));
+
+	if (status < 0) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+		req->wb_context->error = status;
+		dprintk(", error = %d\n", status);
+	} else {
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+		if (data->verf.committed < NFS_FILE_SYNC) {
+			if (!NFS_NEED_COMMIT(req)) {
+				nfs_defer_commit(req);
+				memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+				dprintk(" defer commit\n");
+			} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
+				nfs_defer_reschedule(req);
+				dprintk(" server reboot detected\n");
+			}
+		} else
+#endif
+			dprintk(" OK\n");
+	}
+
+	if (atomic_dec_and_test(&req->wb_complete))
+		nfs_writepage_release(req);
+}
+
+/*
+ * Handle a write reply that flushes a whole page.
+ *
+ * FIXME: There is an inherent race with invalidate_inode_pages and
+ *	  writebacks since the page->count is kept > 1 for as long
+ *	  as the page has a write request pending.
+ */
+static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+{
+	struct nfs_page		*req;
+	struct page		*page;
+
+	/* Update attributes as result of writeback. */
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+		page = req->wb_page;
+
+		dprintk("NFS: write (%s/%Ld %d@%Ld)",
+			req->wb_context->dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_bytes,
+			(long long)req_offset(req));
+
+		if (status < 0) {
+			ClearPageUptodate(page);
+			SetPageError(page);
+			req->wb_context->error = status;
+			end_page_writeback(page);
+			nfs_inode_remove_request(req);
+			dprintk(", error = %d\n", status);
+			goto next;
+		}
+		end_page_writeback(page);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
+			nfs_inode_remove_request(req);
+			dprintk(" OK\n");
+			goto next;
+		}
+		memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+		nfs_mark_request_commit(req);
+		dprintk(" marked for commit\n");
+#else
+		nfs_inode_remove_request(req);
+#endif
+	next:
+		nfs_unlock_request(req);
+	}
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+void nfs_writeback_done(struct rpc_task *task)
+{
+	struct nfs_write_data	*data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_writeargs	*argp = &data->args;
+	struct nfs_writeres	*resp = &data->res;
+
+	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+		task->tk_pid, task->tk_status);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+		/* We tried a write call, but the server did not
+		 * commit data to stable storage even though we
+		 * requested it.
+		 * Note: There is a known bug in Tru64 < 5.0 in which
+		 *	 the server reports NFS_DATA_SYNC, but performs
+		 *	 NFS_FILE_SYNC. We therefore implement this checking
+		 *	 as a dprintk() in order to avoid filling syslog.
+		 */
+		static unsigned long    complain;
+
+		if (time_before(complain, jiffies)) {
+			dprintk("NFS: faulty NFS server %s:"
+				" (committed = %d) != (stable = %d)\n",
+				NFS_SERVER(data->inode)->hostname,
+				resp->verf->committed, argp->stable);
+			complain = jiffies + 300 * HZ;
+		}
+	}
+#endif
+	/* Is this a short write? */
+	if (task->tk_status >= 0 && resp->count < argp->count) {
+		static unsigned long    complain;
+
+		/* Has the server at least made some progress? */
+		if (resp->count != 0) {
+			/* Was this an NFSv2 write or an NFSv3 stable write? */
+			if (resp->verf->committed != NFS_UNSTABLE) {
+				/* Resend from where the server left off */
+				argp->offset += resp->count;
+				argp->pgbase += resp->count;
+				argp->count -= resp->count;
+			} else {
+				/* Resend as a stable write in order to avoid
+				 * headaches in the case of a server crash.
+				 */
+				argp->stable = NFS_FILE_SYNC;
+			}
+			rpc_restart_call(task);
+			return;
+		}
+		if (time_before(complain, jiffies)) {
+			printk(KERN_WARNING
+			       "NFS: Server wrote zero bytes, expected %u.\n",
+					argp->count);
+			complain = jiffies + 300 * HZ;
+		}
+		/* Can't do anything about it except throw an error. */
+		task->tk_status = -EIO;
+	}
+
+	/*
+	 * Process the nfs_page list
+	 */
+	data->complete(data, task->tk_status);
+}
+
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_commit_release(struct rpc_task *task)
+{
+	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
+	nfs_commit_free(wdata);
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static void nfs_commit_rpcsetup(struct list_head *head,
+		struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct nfs_page		*first, *last;
+	struct inode		*inode;
+	loff_t			start, end, len;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	list_splice_init(head, &data->pages);
+	first = nfs_list_entry(data->pages.next);
+	last = nfs_list_entry(data->pages.prev);
+	inode = first->wb_context->dentry->d_inode;
+
+	/*
+	 * Determine the offset range of requests in the COMMIT call.
+	 * We rely on the fact that data->pages is an ordered list...
+	 */
+	start = req_offset(first);
+	end = req_offset(last) + last->wb_bytes;
+	len = end - start;
+	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
+	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
+		len = 0;
+
+	data->inode	  = inode;
+	data->cred	  = first->wb_context->cred;
+
+	data->args.fh     = NFS_FH(data->inode);
+	data->args.offset = start;
+	data->args.count  = len;
+	data->res.count   = len;
+	data->res.fattr   = &data->fattr;
+	data->res.verf    = &data->verf;
+	
+	NFS_PROTO(inode)->commit_setup(data, how);
+
+	data->task.tk_priority = flush_task_priority(how);
+	data->task.tk_cookie = (unsigned long)inode;
+	data->task.tk_calldata = data;
+	/* Release requests */
+	data->task.tk_release = nfs_commit_release;
+	
+	dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+}
+
+/*
+ * Commit dirty pages
+ */
+static int
+nfs_commit_list(struct list_head *head, int how)
+{
+	struct nfs_write_data	*data;
+	struct nfs_page         *req;
+
+	data = nfs_commit_alloc();
+
+	if (!data)
+		goto out_bad;
+
+	/* Set up the argument struct */
+	nfs_commit_rpcsetup(head, data, how);
+
+	nfs_execute_write(data);
+	return 0;
+ out_bad:
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_commit(req);
+		nfs_unlock_request(req);
+	}
+	return -ENOMEM;
+}
+
+/*
+ * COMMIT call returned
+ */
+void
+nfs_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data	*data = (struct nfs_write_data *)task->tk_calldata;
+	struct nfs_page		*req;
+	int res = 0;
+
+        dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+                                task->tk_pid, task->tk_status);
+
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+
+		dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+			req->wb_context->dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_bytes,
+			(long long)req_offset(req));
+		if (task->tk_status < 0) {
+			req->wb_context->error = task->tk_status;
+			nfs_inode_remove_request(req);
+			dprintk(", error = %d\n", task->tk_status);
+			goto next;
+		}
+
+		/* Okay, COMMIT succeeded, apparently. Check the verifier
+		 * returned by the server against all stored verfs. */
+		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+			/* We have a match */
+			nfs_inode_remove_request(req);
+			dprintk(" OK\n");
+			goto next;
+		}
+		/* We have a mismatch. Write the page again */
+		dprintk(" mismatch\n");
+		nfs_mark_request_dirty(req);
+	next:
+		nfs_unlock_request(req);
+		res++;
+	}
+	sub_page_state(nr_unstable,res);
+}
+#endif
+
+static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
+			   unsigned int npages, int how)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	LIST_HEAD(head);
+	int			res,
+				error = 0;
+
+	spin_lock(&nfsi->req_lock);
+	res = nfs_scan_dirty(inode, &head, idx_start, npages);
+	spin_unlock(&nfsi->req_lock);
+	if (res)
+		error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+	if (error < 0)
+		return error;
+	return res;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
+		    unsigned int npages, int how)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	LIST_HEAD(head);
+	int			res,
+				error = 0;
+
+	spin_lock(&nfsi->req_lock);
+	res = nfs_scan_commit(inode, &head, idx_start, npages);
+	if (res) {
+		res += nfs_scan_commit(inode, &head, 0, 0);
+		spin_unlock(&nfsi->req_lock);
+		error = nfs_commit_list(&head, how);
+	} else
+		spin_unlock(&nfsi->req_lock);
+	if (error < 0)
+		return error;
+	return res;
+}
+#endif
+
+int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
+		  unsigned int npages, int how)
+{
+	int	error,
+		wait;
+
+	wait = how & FLUSH_WAIT;
+	how &= ~FLUSH_WAIT;
+
+	do {
+		error = 0;
+		if (wait)
+			error = nfs_wait_on_requests(inode, idx_start, npages);
+		if (error == 0)
+			error = nfs_flush_inode(inode, idx_start, npages, how);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+		if (error == 0)
+			error = nfs_commit_inode(inode, idx_start, npages, how);
+#endif
+	} while (error > 0);
+	return error;
+}
+
+int nfs_init_writepagecache(void)
+{
+	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
+					     sizeof(struct nfs_write_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (nfs_wdata_cachep == NULL)
+		return -ENOMEM;
+
+	nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
+					   mempool_alloc_slab,
+					   mempool_free_slab,
+					   nfs_wdata_cachep);
+	if (nfs_wdata_mempool == NULL)
+		return -ENOMEM;
+
+	nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
+					   mempool_alloc_slab,
+					   mempool_free_slab,
+					   nfs_wdata_cachep);
+	if (nfs_commit_mempool == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_writepagecache(void)
+{
+	mempool_destroy(nfs_commit_mempool);
+	mempool_destroy(nfs_wdata_mempool);
+	if (kmem_cache_destroy(nfs_wdata_cachep))
+		printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
+}
+