54 files changed, 939 insertions, 758 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 1554731bd65..18121af99d3 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -3,6 +3,7 @@
  *
  * 9P protocol conversion functions
  *
+ *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -55,66 +56,70 @@ static inline int buf_check_overflow(struct cbuf *buf)
 	return buf->p > buf->ep;
 }
 
-static inline void buf_check_size(struct cbuf *buf, int len)
+static inline int buf_check_size(struct cbuf *buf, int len)
 {
 	if (buf->p+len > buf->ep) {
 		if (buf->p < buf->ep) {
 			eprintk(KERN_ERR, "buffer overflow\n");
 			buf->p = buf->ep + 1;
+			return 0;
 		}
 	}
+
+	return 1;
 }
 
 static inline void *buf_alloc(struct cbuf *buf, int len)
 {
 	void *ret = NULL;
 
-	buf_check_size(buf, len);
-	ret = buf->p;
-	buf->p += len;
+	if (buf_check_size(buf, len)) {
+		ret = buf->p;
+		buf->p += len;
+	}
 
 	return ret;
 }
 
 static inline void buf_put_int8(struct cbuf *buf, u8 val)
 {
-	buf_check_size(buf, 1);
-
-	buf->p[0] = val;
-	buf->p++;
+	if (buf_check_size(buf, 1)) {
+		buf->p[0] = val;
+		buf->p++;
+	}
 }
 
 static inline void buf_put_int16(struct cbuf *buf, u16 val)
 {
-	buf_check_size(buf, 2);
-
-	*(__le16 *) buf->p = cpu_to_le16(val);
-	buf->p += 2;
+	if (buf_check_size(buf, 2)) {
+		*(__le16 *) buf->p = cpu_to_le16(val);
+		buf->p += 2;
+	}
 }
 
 static inline void buf_put_int32(struct cbuf *buf, u32 val)
 {
-	buf_check_size(buf, 4);
-
-	*(__le32 *)buf->p = cpu_to_le32(val);
-	buf->p += 4;
+	if (buf_check_size(buf, 4)) {
+		*(__le32 *)buf->p = cpu_to_le32(val);
+		buf->p += 4;
+	}
 }
 
 static inline void buf_put_int64(struct cbuf *buf, u64 val)
 {
-	buf_check_size(buf, 8);
-
-	*(__le64 *)buf->p = cpu_to_le64(val);
-	buf->p += 8;
+	if (buf_check_size(buf, 8)) {
+		*(__le64 *)buf->p = cpu_to_le64(val);
+		buf->p += 8;
+	}
 }
 
 static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
 {
-	buf_check_size(buf, slen + 2);
-
-	buf_put_int16(buf, slen);
-	memcpy(buf->p, s, slen);
-	buf->p += slen;
+	if (buf_check_size(buf, slen + 2)) {
+		buf_put_int16(buf, slen);
+		memcpy(buf->p, s, slen);
+		buf->p += slen;
+	}
 }
 
 static inline void buf_put_string(struct cbuf *buf, const char *s)
@@ -124,20 +129,20 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
 
 static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
 {
-	buf_check_size(buf, datalen);
-
-	memcpy(buf->p, data, datalen);
-	buf->p += datalen;
+	if (buf_check_size(buf, datalen)) {
+		memcpy(buf->p, data, datalen);
+		buf->p += datalen;
+	}
 }
 
 static inline u8 buf_get_int8(struct cbuf *buf)
 {
 	u8 ret = 0;
 
-	buf_check_size(buf, 1);
-	ret = buf->p[0];
-
-	buf->p++;
+	if (buf_check_size(buf, 1)) {
+		ret = buf->p[0];
+		buf->p++;
+	}
 
 	return ret;
 }
@@ -146,10 +151,10 @@ static inline u16 buf_get_int16(struct cbuf *buf)
 {
 	u16 ret = 0;
 
-	buf_check_size(buf, 2);
-	ret = le16_to_cpu(*(__le16 *)buf->p);
-
-	buf->p += 2;
+	if (buf_check_size(buf, 2)) {
+		ret = le16_to_cpu(*(__le16 *)buf->p);
+		buf->p += 2;
+	}
 
 	return ret;
 }
@@ -158,10 +163,10 @@ static inline u32 buf_get_int32(struct cbuf *buf)
 {
 	u32 ret = 0;
 
-	buf_check_size(buf, 4);
-	ret = le32_to_cpu(*(__le32 *)buf->p);
-
-	buf->p += 4;
+	if (buf_check_size(buf, 4)) {
+		ret = le32_to_cpu(*(__le32 *)buf->p);
+		buf->p += 4;
+	}
 
 	return ret;
 }
@@ -170,10 +175,10 @@ static inline u64 buf_get_int64(struct cbuf *buf)
 {
 	u64 ret = 0;
 
-	buf_check_size(buf, 8);
-	ret = le64_to_cpu(*(__le64 *)buf->p);
-
-	buf->p += 8;
+	if (buf_check_size(buf, 8)) {
+		ret = le64_to_cpu(*(__le64 *)buf->p);
+		buf->p += 8;
+	}
 
 	return ret;
 }
@@ -181,27 +186,35 @@ static inline u64 buf_get_int64(struct cbuf *buf)
 static inline int
 buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
 {
+	u16 len = 0;
+
+	len = buf_get_int16(buf);
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
+		memcpy(data, buf->p, len);
+		data[len] = 0;
+		buf->p += len;
+		len++;
+	}
 
-	u16 len = buf_get_int16(buf);
-	buf_check_size(buf, len);
-	if (len + 1 > datalen)
-		return 0;
-
-	memcpy(data, buf->p, len);
-	data[len] = 0;
-	buf->p += len;
-
-	return len + 1;
+	return len;
 }
 
 static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
 {
-	char *ret = NULL;
-	int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
+	char *ret;
+	u16 len;
+
+	ret = NULL;
+	len = buf_get_int16(buf);
 
-	if (n > 0) {
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
+		buf_check_size(sbuf, len+1)) {
+
+		memcpy(sbuf->p, buf->p, len);
+		sbuf->p[len] = 0;
 		ret = sbuf->p;
-		sbuf->p += n;
+		buf->p += len;
+		sbuf->p += len + 1;
 	}
 
 	return ret;
@@ -209,12 +222,15 @@ static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
 
 static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
 {
-	buf_check_size(buf, datalen);
+	int ret = 0;
 
-	memcpy(data, buf->p, datalen);
-	buf->p += datalen;
+	if (buf_check_size(buf, datalen)) {
+		memcpy(data, buf->p, datalen);
+		buf->p += datalen;
+		ret = datalen;
+	}
 
-	return datalen;
+	return ret;
 }
 
 static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
@@ -223,13 +239,12 @@ static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
 	char *ret = NULL;
 	int n = 0;
 
-	buf_check_size(dbuf, datalen);
-
-	n = buf_get_data(buf, dbuf->p, datalen);
-
-	if (n > 0) {
-		ret = dbuf->p;
-		dbuf->p += n;
+	if (buf_check_size(dbuf, datalen)) {
+		n = buf_get_data(buf, dbuf->p, datalen);
+		if (n > 0) {
+			ret = dbuf->p;
+			dbuf->p += n;
+		}
 	}
 
 	return ret;
@@ -636,7 +651,7 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
 		break;
 	case RWALK:
 		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		rcall->params.rwalk.wqids = buf_alloc(bufp,
+		rcall->params.rwalk.wqids = buf_alloc(dbufp,
 		      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
 		if (rcall->params.rwalk.wqids)
 			for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 821c9c4d76a..d95f8626d17 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -71,21 +71,28 @@ static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
  *
  */
 
-struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
+struct v9fs_fid *v9fs_fid_create(struct dentry *dentry,
+	struct v9fs_session_info *v9ses, int fid, int create)
 {
 	struct v9fs_fid *new;
 
+	dprintk(DEBUG_9P, "fid create dentry %p, fid %d, create %d\n",
+		dentry, fid, create);
+
 	new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
 	if (new == NULL) {
 		dprintk(DEBUG_ERROR, "Out of Memory\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	new->fid = -1;
+	new->fid = fid;
+	new->v9ses = v9ses;
 	new->fidopen = 0;
-	new->fidcreate = 0;
+	new->fidcreate = create;
 	new->fidclunked = 0;
 	new->iounit = 0;
+	new->rdir_pos = 0;
+	new->rdir_fcall = NULL;
 
 	if (v9fs_fid_insert(new, dentry) == 0)
 		return new;
@@ -109,6 +116,59 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
 }
 
 /**
+ * v9fs_fid_walk_up - walks from the process current directory
+ * 	up to the specified dentry.
+ */
+static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
+{
+	int fidnum, cfidnum, err;
+	struct v9fs_fid *cfid;
+	struct dentry *cde;
+	struct v9fs_session_info *v9ses;
+
+	v9ses = v9fs_inode2v9ses(current->fs->pwd->d_inode);
+	cfid = v9fs_fid_lookup(current->fs->pwd);
+	if (cfid == NULL) {
+		dprintk(DEBUG_ERROR, "process cwd doesn't have a fid\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	cfidnum = cfid->fid;
+	cde = current->fs->pwd;
+	/* TODO: take advantage of multiwalk */
+
+	fidnum = v9fs_get_idpool(&v9ses->fidpool);
+	if (fidnum < 0) {
+		dprintk(DEBUG_ERROR, "could not get a new fid num\n");
+		err = -ENOENT;
+		goto clunk_fid;
+	}
+
+	while (cde != dentry) {
+		if (cde == cde->d_parent) {
+			dprintk(DEBUG_ERROR, "can't find dentry\n");
+			err = -ENOENT;
+			goto clunk_fid;
+		}
+
+		err = v9fs_t_walk(v9ses, cfidnum, fidnum, "..", NULL);
+		if (err < 0) {
+			dprintk(DEBUG_ERROR, "problem walking to parent\n");
+			goto clunk_fid;
+		}
+
+		cfidnum = fidnum;
+		cde = cde->d_parent;
+	}
+
+	return v9fs_fid_create(dentry, v9ses, fidnum, 0);
+
+clunk_fid:
+	v9fs_t_clunk(v9ses, fidnum, NULL);
+	return ERR_PTR(err);
+}
+
+/**
  * v9fs_fid_lookup - retrieve the right fid from a  particular dentry
  * @dentry: dentry to look for fid in
  * @type: intent of lookup (operation or traversal)
@@ -119,49 +179,25 @@ void v9fs_fid_destroy(struct v9fs_fid *fid)
  *
  */
 
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
 	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
 	struct v9fs_fid *current_fid = NULL;
 	struct v9fs_fid *temp = NULL;
 	struct v9fs_fid *return_fid = NULL;
-	int found_parent = 0;
-	int found_user = 0;
 
-	dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
-		type);
+	dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
-	if (fid_list && !list_empty(fid_list)) {
+	if (fid_list) {
 		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-			if (current_fid->uid == current->uid) {
-				if (return_fid == NULL) {
-					if ((type == FID_OP)
-					    || (!current_fid->fidopen)) {
-						return_fid = current_fid;
-						found_user = 1;
-					}
-				}
-			}
-			if (current_fid->pid == current->real_parent->pid) {
-				if ((return_fid == NULL) || (found_parent)
-				    || (found_user)) {
-					if ((type == FID_OP)
-					    || (!current_fid->fidopen)) {
-						return_fid = current_fid;
-						found_parent = 1;
-						found_user = 0;
-					}
-				}
-			}
-			if (current_fid->pid == current->pid) {
-				if ((type == FID_OP) ||
-				    (!current_fid->fidopen)) {
-					return_fid = current_fid;
-					found_parent = 0;
-					found_user = 0;
-				}
+			if (!current_fid->fidcreate) {
+				return_fid = current_fid;
+				break;
 			}
 		}
+
+		if (!return_fid)
+			return_fid = current_fid;
 	}
 
 	/* we are at the root but didn't match */
@@ -187,55 +223,33 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
 
 /* XXX - there may be some duplication we can get rid of */
 		if (par == dentry) {
-			/* we need to fid_lookup the starting point */
-			int fidnum = -1;
-			int oldfid = -1;
-			int result = -1;
-			struct v9fs_session_info *v9ses =
-			    v9fs_inode2v9ses(current->fs->pwd->d_inode);
-
-			current_fid =
-			    v9fs_fid_lookup(current->fs->pwd, FID_WALK);
-			if (current_fid == NULL) {
-				dprintk(DEBUG_ERROR,
-					"process cwd doesn't have a fid\n");
-				return return_fid;
-			}
-			oldfid = current_fid->fid;
-			par = current->fs->pwd;
-			/* TODO: take advantage of multiwalk */
+			return_fid = v9fs_fid_walk_up(dentry);
+			if (IS_ERR(return_fid))
+				return_fid = NULL;
+		}
+	}
 
-			fidnum = v9fs_get_idpool(&v9ses->fidpool);
-			if (fidnum < 0) {
-				dprintk(DEBUG_ERROR,
-					"could not get a new fid num\n");
-				return return_fid;
-			}
+	return return_fid;
+}
 
-			while (par != dentry) {
-				result =
-				    v9fs_t_walk(v9ses, oldfid, fidnum, "..",
-						NULL);
-				if (result < 0) {
-					dprintk(DEBUG_ERROR,
-						"problem walking to parent\n");
-
-					break;
-				}
-				oldfid = fidnum;
-				if (par == par->d_parent) {
-					dprintk(DEBUG_ERROR,
-						"can't find dentry\n");
-					break;
-				}
-				par = par->d_parent;
-			}
-			if (par == dentry) {
-				return_fid = v9fs_fid_create(dentry);
-				return_fid->fid = fidnum;
+struct v9fs_fid *v9fs_fid_get_created(struct dentry *dentry)
+{
+	struct list_head *fid_list;
+	struct v9fs_fid *fid, *ftmp, *ret;
+
+	dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+	fid_list = (struct list_head *)dentry->d_fsdata;
+	ret = NULL;
+	if (fid_list) {
+		list_for_each_entry_safe(fid, ftmp, fid_list, list) {
+			if (fid->fidcreate && fid->pid == current->pid) {
+				list_del(&fid->list);
+				ret = fid;
+				break;
 			}
 		}
 	}
 
-	return return_fid;
+	dprintk(DEBUG_9P, "return %p\n", ret);
+	return ret;
 }
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 7db478ccca3..84c673a44c8 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -25,6 +25,7 @@
 
 #define FID_OP   0
 #define FID_WALK 1
+#define FID_CREATE 2
 
 struct v9fs_fid {
 	struct list_head list;	 /* list of fids associated with a dentry */
@@ -52,6 +53,8 @@ struct v9fs_fid {
 	struct v9fs_session_info *v9ses;	/* session info for this FID */
 };
 
-struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry);
+struct v9fs_fid *v9fs_fid_get_created(struct dentry *);
 void v9fs_fid_destroy(struct v9fs_fid *fid);
-struct v9fs_fid *v9fs_fid_create(struct dentry *);
+struct v9fs_fid *v9fs_fid_create(struct dentry *,
+	struct v9fs_session_info *v9ses, int fid, int create);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 13bdbbab438..82303f3bf76 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -303,7 +303,13 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 		goto SessCleanUp;
 	};
 
-	v9ses->transport = trans_proto;
+	v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL);
+	if (!v9ses->transport) {
+		retval = -ENOMEM;
+		goto SessCleanUp;
+	}
+
+	memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport));
 
 	if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
 		eprintk(KERN_ERR, "problem initializing transport\n");
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 306c96741f8..a6aa947de0f 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -67,7 +67,7 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
 	struct dentry *dc = current->fs->pwd;
 
 	dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
-	if (v9fs_fid_lookup(dentry, FID_OP)) {
+	if (v9fs_fid_lookup(dentry)) {
 		dprintk(DEBUG_VFS, "VALID\n");
 		return 1;
 	}
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index c478a738418..57a43b8feef 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -197,21 +197,18 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 	filemap_fdatawait(inode->i_mapping);
 
 	if (fidnum >= 0) {
-		fid->fidopen--;
 		dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
 			fid->fid);
 
-		if (fid->fidopen == 0) {
-			if (v9fs_t_clunk(v9ses, fidnum, NULL))
-				dprintk(DEBUG_ERROR, "clunk failed\n");
+		if (v9fs_t_clunk(v9ses, fidnum, NULL))
+			dprintk(DEBUG_ERROR, "clunk failed\n");
 
-			v9fs_put_idpool(fid->fid, &v9ses->fidpool);
-		}
+		v9fs_put_idpool(fid->fid, &v9ses->fidpool);
 
 		kfree(fid->rdir_fcall);
+		kfree(fid);
 
 		filp->private_data = NULL;
-		v9fs_fid_destroy(fid);
 	}
 
 	d_drop(filp->f_dentry);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 1f8ae7d580a..a4799e971d1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -53,30 +53,36 @@
 int v9fs_file_open(struct inode *inode, struct file *file)
 {
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-	struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
-	struct v9fs_fid *v9newfid = NULL;
+	struct v9fs_fid *v9fid, *fid;
 	struct v9fs_fcall *fcall = NULL;
 	int open_mode = 0;
 	unsigned int iounit = 0;
 	int newfid = -1;
 	long result = -1;
 
-	dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
-		v9fid);
+	dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+
+	v9fid = v9fs_fid_get_created(file->f_dentry);
+	if (!v9fid)
+		v9fid = v9fs_fid_lookup(file->f_dentry);
 
 	if (!v9fid) {
-		struct dentry *dentry = file->f_dentry;
 		dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
+		return -EBADF;
+	}
 
-		/* XXX - some duplication from lookup, generalize later */
-		/* basically vfs_lookup is too heavy weight */
-		v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
-		if (!v9fid)
-			return -EBADF;
+	if (!v9fid->fidcreate) {
+		fid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+		if (fid == NULL) {
+			dprintk(DEBUG_ERROR, "Out of Memory\n");
+			return -ENOMEM;
+		}
 
-		v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
-		if (!v9fid)
-			return -EBADF;
+		fid->fidopen = 0;
+		fid->fidcreate = 0;
+		fid->fidclunked = 0;
+		fid->iounit = 0;
+		fid->v9ses = v9ses;
 
 		newfid = v9fs_get_idpool(&v9ses->fidpool);
 		if (newfid < 0) {
@@ -85,58 +91,16 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		}
 
 		result =
-		    v9fs_t_walk(v9ses, v9fid->fid, newfid,
-				(char *)file->f_dentry->d_name.name, NULL);
+		    v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, NULL);
+
 		if (result < 0) {
 			v9fs_put_idpool(newfid, &v9ses->fidpool);
 			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
 			return -EBADF;
 		}
 
-		v9fid = v9fs_fid_create(dentry);
-		if (v9fid == NULL) {
-			dprintk(DEBUG_ERROR, "couldn't insert\n");
-			return -ENOMEM;
-		}
-		v9fid->fid = newfid;
-	}
-
-	if (v9fid->fidcreate) {
-		/* create case */
-		newfid = v9fid->fid;
-		iounit = v9fid->iounit;
-		v9fid->fidcreate = 0;
-	} else {
-		if (!S_ISDIR(inode->i_mode))
-			newfid = v9fid->fid;
-		else {
-			newfid = v9fs_get_idpool(&v9ses->fidpool);
-			if (newfid < 0) {
-				eprintk(KERN_WARNING, "allocation failed\n");
-				return -ENOSPC;
-			}
-			/* This would be a somewhat critical clone */
-			result =
-			    v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
-					&fcall);
-			if (result < 0) {
-				dprintk(DEBUG_ERROR, "clone error: %s\n",
-					FCALL_ERROR(fcall));
-				kfree(fcall);
-				return result;
-			}
-
-			v9newfid = v9fs_fid_create(file->f_dentry);
-			v9newfid->fid = newfid;
-			v9newfid->qid = v9fid->qid;
-			v9newfid->iounit = v9fid->iounit;
-			v9newfid->fidopen = 0;
-			v9newfid->fidclunked = 0;
-			v9newfid->v9ses = v9ses;
-			v9fid = v9newfid;
-			kfree(fcall);
-		}
-
+		fid->fid = newfid;
+		v9fid = fid;
 		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
 		/* translate open mode appropriately */
 		open_mode = file->f_flags & 0x3;
@@ -163,9 +127,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 		iounit = fcall->params.ropen.iounit;
 		kfree(fcall);
+	} else {
+		/* create case */
+		newfid = v9fid->fid;
+		iounit = v9fid->iounit;
+		v9fid->fidcreate = 0;
 	}
 
-
 	file->private_data = v9fid;
 
 	v9fid->rdir_pos = 0;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0c13fc60004..2b696ae6655 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -307,7 +307,7 @@ v9fs_create(struct inode *dir,
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
 	struct super_block *sb = dir->i_sb;
 	struct v9fs_fid *dirfid =
-	    v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
+	    v9fs_fid_lookup(file_dentry->d_parent);
 	struct v9fs_fid *fid = NULL;
 	struct inode *file_inode = NULL;
 	struct v9fs_fcall *fcall = NULL;
@@ -317,6 +317,7 @@ v9fs_create(struct inode *dir,
 	long newfid = -1;
 	int result = 0;
 	unsigned int iounit = 0;
+	int wfidno = -1;
 
 	perm = unixmode2p9mode(v9ses, perm);
 
@@ -350,7 +351,7 @@ v9fs_create(struct inode *dir,
 	if (result < 0) {
 		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
 		v9fs_put_idpool(newfid, &v9ses->fidpool);
-		newfid = 0;
+		newfid = -1;
 		goto CleanUpFid;
 	}
 
@@ -369,20 +370,39 @@ v9fs_create(struct inode *dir,
 	qid = fcall->params.rcreate.qid;
 	kfree(fcall);
 
-	fid = v9fs_fid_create(file_dentry);
+	fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
+	dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
 	if (!fid) {
 		result = -ENOMEM;
 		goto CleanUpFid;
 	}
 
-	fid->fid = newfid;
-	fid->fidopen = 0;
-	fid->fidcreate = 1;
 	fid->qid = qid;
 	fid->iounit = iounit;
-	fid->rdir_pos = 0;
-	fid->rdir_fcall = NULL;
-	fid->v9ses = v9ses;
+
+	/* walk to the newly created file and put the fid in the dentry */
+	wfidno = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "no free fids available\n");
+		return -ENOSPC;
+	}
+
+	result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
+		(char *) file_dentry->d_name.name, NULL);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		wfidno = -1;
+		goto CleanUpFid;
+	}
+
+	if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall)) {
+			v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		}
+
+		goto CleanUpFid;
+	}
 
 	if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
 	    (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
@@ -410,11 +430,11 @@ v9fs_create(struct inode *dir,
 	d_instantiate(file_dentry, file_inode);
 
 	if (perm & V9FS_DMDIR) {
-		if (v9fs_t_clunk(v9ses, newfid, &fcall))
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+		else
 			dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
 				FCALL_ERROR(fcall));
-
-		v9fs_put_idpool(newfid, &v9ses->fidpool);
 		kfree(fcall);
 		fid->fidopen = 0;
 		fid->fidcreate = 0;
@@ -426,12 +446,22 @@ v9fs_create(struct inode *dir,
       CleanUpFid:
 	kfree(fcall);
 
-	if (newfid) {
-		if (v9fs_t_clunk(v9ses, newfid, &fcall))
+	if (newfid >= 0) {
+		if (!v9fs_t_clunk(v9ses, newfid, &fcall))
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+		else
+			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+				FCALL_ERROR(fcall));
+
+		kfree(fcall);
+	}
+	if (wfidno >= 0) {
+		if (!v9fs_t_clunk(v9ses, wfidno, &fcall))
+			v9fs_put_idpool(wfidno, &v9ses->fidpool);
+		else
 			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
 				FCALL_ERROR(fcall));
 
-		v9fs_put_idpool(newfid, &v9ses->fidpool);
 		kfree(fcall);
 	}
 	return result;
@@ -461,7 +491,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	file_inode = file->d_inode;
 	sb = file_inode->i_sb;
 	v9ses = v9fs_inode2v9ses(file_inode);
-	v9fid = v9fs_fid_lookup(file, FID_OP);
+	v9fid = v9fs_fid_lookup(file);
 
 	if (!v9fid) {
 		dprintk(DEBUG_ERROR,
@@ -545,7 +575,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
-	dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+	dirfid = v9fs_fid_lookup(dentry->d_parent);
 
 	if (!dirfid) {
 		dprintk(DEBUG_ERROR, "no dirfid\n");
@@ -573,7 +603,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 		v9fs_put_idpool(newfid, &v9ses->fidpool);
 		if (result == -ENOENT) {
 			d_add(dentry, NULL);
-			dprintk(DEBUG_ERROR,
+			dprintk(DEBUG_VFS,
 				"Return negative dentry %p count %d\n",
 				dentry, atomic_read(&dentry->d_count));
 			return NULL;
@@ -601,16 +631,13 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
 
-	fid = v9fs_fid_create(dentry);
+	fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
 	if (fid == NULL) {
 		dprintk(DEBUG_ERROR, "couldn't insert\n");
 		result = -ENOMEM;
 		goto FreeFcall;
 	}
 
-	fid->fid = newfid;
-	fid->fidopen = 0;
-	fid->v9ses = v9ses;
 	fid->qid = fcall->params.rstat.stat->qid;
 
 	dentry->d_op = &v9fs_dentry_operations;
@@ -665,11 +692,11 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = old_dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
-	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
 	struct v9fs_fid *olddirfid =
-	    v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
+	    v9fs_fid_lookup(old_dentry->d_parent);
 	struct v9fs_fid *newdirfid =
-	    v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
+	    v9fs_fid_lookup(new_dentry->d_parent);
 	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
 	struct v9fs_fcall *fcall = NULL;
 	int fid = -1;
@@ -744,7 +771,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 {
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
 	int err = -EPERM;
 
 	dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
@@ -778,7 +805,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
 	int res = -EPERM;
@@ -960,7 +987,7 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	if (retval != 0)
 		goto FreeFcall;
 
-	newfid = v9fs_fid_lookup(dentry, FID_OP);
+	newfid = v9fs_fid_lookup(dentry);
 
 	/* issue a twstat */
 	v9fs_blank_mistat(v9ses, mistat);
@@ -1004,7 +1031,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
 
 	if (!fid) {
 		dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
@@ -1063,8 +1090,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
 	int ret;
 	char *link = __getname();
 
-	if (strlen(link) < buflen)
-		buflen = strlen(link);
+	if (buflen > PATH_MAX)
+		buflen = PATH_MAX;
 
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
@@ -1148,7 +1175,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
 	struct v9fs_fid *newfid = NULL;
 	char *symname = __getname();
 
@@ -1168,7 +1195,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (retval != 0)
 		goto FreeMem;
 
-	newfid = v9fs_fid_lookup(dentry, FID_OP);
+	newfid = v9fs_fid_lookup(dentry);
 	if (!newfid) {
 		dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
 		goto FreeMem;
@@ -1246,7 +1273,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	if (retval != 0)
 		goto FreeMem;
 
-	newfid = v9fs_fid_lookup(dentry, FID_OP);
+	newfid = v9fs_fid_lookup(dentry);
 	if (!newfid) {
 		dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
 		retval = -EINVAL;
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 868f350b2c5..82c5b008407 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -129,8 +129,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
 		dprintk(DEBUG_ERROR, "problem initiating session\n");
-		retval = newfid;
-		goto free_session;
+		return ERR_PTR(newfid);
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
@@ -150,28 +149,24 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if (!root) {
 		retval = -ENOMEM;
-		goto release_inode;
+		goto put_back_sb;
 	}
 
 	sb->s_root = root;
 
-	/* Setup the Root Inode */
-	root_fid = v9fs_fid_create(root);
-	if (root_fid == NULL) {
-		retval = -ENOMEM;
-		goto release_dentry;
-	}
-
-	root_fid->fidopen = 0;
-	root_fid->v9ses = v9ses;
-
 	stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
 	if (stat_result < 0) {
 		dprintk(DEBUG_ERROR, "stat error\n");
 		v9fs_t_clunk(v9ses, newfid, NULL);
 		v9fs_put_idpool(newfid, &v9ses->fidpool);
 	} else {
-		root_fid->fid = newfid;
+		/* Setup the Root Inode */
+		root_fid = v9fs_fid_create(root, v9ses, newfid, 0);
+		if (root_fid == NULL) {
+			retval = -ENOMEM;
+			goto put_back_sb;
+		}
+
 		root_fid->qid = fcall->params.rstat.stat->qid;
 		root->d_inode->i_ino =
 		    v9fs_qid2ino(&fcall->params.rstat.stat->qid);
@@ -182,25 +177,15 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if (stat_result < 0) {
 		retval = stat_result;
-		goto release_dentry;
+		goto put_back_sb;
 	}
 
 	return sb;
 
-      release_dentry:
-	dput(sb->s_root);
-
-      release_inode:
-	iput(inode);
-
-      put_back_sb:
+put_back_sb:
+	/* deactivate_super calls v9fs_kill_super which will frees the rest */
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	v9fs_session_close(v9ses);
-
-      free_session:
-	kfree(v9ses);
-
 	return ERR_PTR(retval);
 }
 
diff --git a/fs/Kconfig b/fs/Kconfig
index 068ccea2f18..48f5422cb19 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -472,6 +472,9 @@ config FUSE_FS
 	  utilities is available from the FUSE homepage:
 	  <http://fuse.sourceforge.net/>
 
+	  See <file:Documentation/filesystems/fuse.txt> for more information.
+	  See <file:Documentation/Changes> for needed library/utility version.
+
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
diff --git a/fs/aio.c b/fs/aio.c
index 38f62680fd6..d6b1551342b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -562,6 +562,7 @@ static inline void lock_kiocb(struct kiocb *iocb)
 static inline void unlock_kiocb(struct kiocb *iocb)
 {
 	kiocbClearLocked(iocb);
+	smp_mb__after_clear_bit();
 	wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
 }
 
@@ -740,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	ret = retry(iocb);
 	current->io_wait = NULL;
 
-	if (-EIOCBRETRY != ret) {
- 		if (-EIOCBQUEUED != ret) {
-			BUG_ON(!list_empty(&iocb->ki_wait.task_list));
-			aio_complete(iocb, ret, 0);
-			/* must not access the iocb after this */
-		}
-	} else {
-		/*
-		 * Issue an additional retry to avoid waiting forever if
-		 * no waits were queued (e.g. in case of a short read).
-		 */
-		if (list_empty(&iocb->ki_wait.task_list))
-			kiocbSetKicked(iocb);
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+		BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+		aio_complete(iocb, ret, 0);
 	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
@@ -898,16 +889,24 @@ static void aio_kick_handler(void *data)
  * and if required activate the aio work queue to process
  * it
  */
-static void queue_kicked_iocb(struct kiocb *iocb)
+static void try_queue_kicked_iocb(struct kiocb *iocb)
 {
  	struct kioctx	*ctx = iocb->ki_ctx;
 	unsigned long flags;
 	int run = 0;
 
-	WARN_ON((!list_empty(&iocb->ki_wait.task_list)));
+	/* We're supposed to be the only path putting the iocb back on the run
+	 * list.  If we find that the iocb is *back* on a wait queue already
+	 * than retry has happened before we could queue the iocb.  This also
+	 * means that the retry could have completed and freed our iocb, no
+	 * good. */
+	BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
 
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
-	run = __queue_kicked_iocb(iocb);
+	/* set this inside the lock so that we can't race with aio_run_iocb()
+	 * testing it and putting the iocb on the run list under the lock */
+	if (!kiocbTryKick(iocb))
+		run = __queue_kicked_iocb(iocb);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	if (run)
 		aio_queue_work(ctx);
@@ -930,10 +929,7 @@ void fastcall kick_iocb(struct kiocb *iocb)
 		return;
 	}
 
-	/* If its already kicked we shouldn't queue it again */
-	if (!kiocbTryKick(iocb)) {
-		queue_kicked_iocb(iocb);
-	}
+	try_queue_kicked_iocb(iocb);
 }
 EXPORT_SYMBOL(kick_iocb);
 
@@ -1321,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
 }
 
 /*
- * Default retry method for aio_read (also used for first time submit)
- * Responsible for updating iocb state as retries progress
+ * aio_p{read,write} are the default  ki_retry methods for
+ * IO_CMD_P{READ,WRITE}.  They maintains kiocb retry state around potentially
+ * multiple calls to f_op->aio_read().  They loop around partial progress
+ * instead of returning -EIOCBRETRY because they don't have the means to call
+ * kick_iocb().
  */
 static ssize_t aio_pread(struct kiocb *iocb)
 {
@@ -1331,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	struct inode *inode = mapping->host;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_read(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
+	do {
+		ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		/*
+		 * Can't just depend on iocb->ki_left to determine
+		 * whether we are done. This may have been a short read.
+		 */
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
 
-	/*
-	 * Can't just depend on iocb->ki_left to determine
-	 * whether we are done. This may have been a short read.
-	 */
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
 		/*
-		 * For pipes and sockets we return once we have
-		 * some data; for regular files we retry till we
-		 * complete the entire read or find that we can't
-		 * read any more data (e.g short reads).
+		 * For pipes and sockets we return once we have some data; for
+		 * regular files we retry till we complete the entire read or
+		 * find that we can't read any more data (e.g short reads).
 		 */
-		if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))
-			ret = -EIOCBRETRY;
-	}
+	} while (ret > 0 && iocb->ki_left > 0 &&
+		 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
 
 	/* This means we must have transferred all that we could */
 	/* No need to retry anymore */
@@ -1359,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	return ret;
 }
 
-/*
- * Default retry method for aio_write (also used for first time submit)
- * Responsible for updating iocb state as retries progress
- */
+/* see aio_pread() */
 static ssize_t aio_pwrite(struct kiocb *iocb)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_write(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
-
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
-
-		ret = -EIOCBRETRY;
-	}
+	do {
+		ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
+	} while (ret > 0 && iocb->ki_left > 0);
 
-	/* This means we must have transferred all that we could */
-	/* No need to retry anymore */
 	if ((ret == 0) || (iocb->ki_left == 0))
 		ret = iocb->ki_nbytes - iocb->ki_left;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8cc23e7d0d5..1ebf7dafc1d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -781,6 +781,8 @@ static int cifs_oplock_thread(void * dummyarg)
 
 	oplockThread = current;
 	do {
+		if (try_to_freeze()) 
+			continue;
 		set_current_state(TASK_INTERRUPTIBLE);
 		
 		schedule_timeout(1*HZ);  
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2335f14a158..47360156cc5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -344,6 +344,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 	}
 
 	while (server->tcpStatus != CifsExiting) {
+		if (try_to_freeze())
+			continue;
 		if (bigbuf == NULL) {
 			bigbuf = cifs_buf_get();
 			if(bigbuf == NULL) {
diff --git a/fs/compat.c b/fs/compat.c
index ac3fb9ed8ee..a719e158e00 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -44,6 +44,8 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/personality.h>
 #include <linux/rwsem.h>
+#include <linux/acct.h>
+#include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
 
@@ -1487,6 +1489,8 @@ int compat_do_execve(char * filename,
 
 		/* execve success */
 		security_bprm_free(bprm);
+		acct_update_integrals(current);
+		update_mem_hiwater(current);
 		kfree(bprm);
 		return retval;
 	}
diff --git a/fs/dcache.c b/fs/dcache.c
index 7376b61269f..fb10386c59b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -102,7 +102,8 @@ static inline void dentry_iput(struct dentry * dentry)
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
-		fsnotify_inoderemove(inode);
+		if (!inode->i_nlink)
+			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
 			dentry->d_op->d_iput(dentry, inode);
 		else
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 6ab1dd0ca90..4284cd31eba 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,6 +101,10 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
+/* Maximum msec timeout value storeable in a long int */
+#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+
+
 struct epoll_filefd {
 	struct file *file;
 	int fd;
@@ -231,8 +235,9 @@ struct ep_pqueue {
 
 static void ep_poll_safewake_init(struct poll_safewake *psw);
 static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
-static int ep_file_init(struct file *file);
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+		    struct eventpoll *ep);
+static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
 static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
@@ -501,38 +506,37 @@ void eventpoll_release_file(struct file *file)
 asmlinkage long sys_epoll_create(int size)
 {
 	int error, fd;
+	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
 		     current, size));
 
-	/* Sanity check on the size parameter */
+	/*
+	 * Sanity check on the size parameter, and create the internal data
+	 * structure ( "struct eventpoll" ).
+	 */
 	error = -EINVAL;
-	if (size <= 0)
+	if (size <= 0 || (error = ep_alloc(&ep)) != 0)
 		goto eexit_1;
 
 	/*
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure, and inode and a free file descriptor.
 	 */
-	error = ep_getfd(&fd, &inode, &file);
-	if (error)
-		goto eexit_1;
-
-	/* Setup the file internal data structure ( "struct eventpoll" ) */
-	error = ep_file_init(file);
+	error = ep_getfd(&fd, &inode, &file, ep);
 	if (error)
 		goto eexit_2;
 
-
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
 		     current, size, fd));
 
 	return fd;
 
 eexit_2:
-	sys_close(fd);
+	ep_free(ep);
+	kfree(ep);
 eexit_1:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
 		     current, size, error));
@@ -706,7 +710,8 @@ eexit_1:
 /*
  * Creates the file descriptor to be used by the epoll interface.
  */
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+		    struct eventpoll *ep)
 {
 	struct qstr this;
 	char name[32];
@@ -756,7 +761,7 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
 	file->f_op = &eventpoll_fops;
 	file->f_mode = FMODE_READ;
 	file->f_version = 0;
-	file->private_data = NULL;
+	file->private_data = ep;
 
 	/* Install the new setup file into the allocated fd. */
 	fd_install(fd, file);
@@ -777,14 +782,13 @@ eexit_1:
 }
 
 
-static int ep_file_init(struct file *file)
+static int ep_alloc(struct eventpoll **pep)
 {
-	struct eventpoll *ep;
+	struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
 
-	if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
+	if (!ep)
 		return -ENOMEM;
 
-	memset(ep, 0, sizeof(*ep));
 	rwlock_init(&ep->lock);
 	init_rwsem(&ep->sem);
 	init_waitqueue_head(&ep->wq);
@@ -792,9 +796,9 @@ static int ep_file_init(struct file *file)
 	INIT_LIST_HEAD(&ep->rdllist);
 	ep->rbr = RB_ROOT;
 
-	file->private_data = ep;
+	*pep = ep;
 
-	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
+	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
 		     current, ep));
 	return 0;
 }
@@ -1506,8 +1510,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	 * and the overflow condition. The passed timeout is in milliseconds,
 	 * that why (t * HZ) / 1000.
 	 */
-	jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
-		MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
+	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
+		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
 
 retry:
 	write_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 14dd03907cc..a04a575ad43 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -421,11 +421,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	if (!mpnt)
 		return -ENOMEM;
 
-	if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
-		kmem_cache_free(vm_area_cachep, mpnt);
-		return -ENOMEM;
-	}
-
 	memset(mpnt, 0, sizeof(*mpnt));
 
 	down_write(&mm->mmap_sem);
@@ -745,8 +740,8 @@ static inline int de_thread(struct task_struct *tsk)
         }
 
 	/*
-	 * Now there are really no other threads at all,
-	 * so it's safe to stop telling them to kill themselves.
+	 * There may be one thread left which is just exiting,
+	 * but it's safe to stop telling the group to kill themselves.
 	 */
 	sig->flags = 0;
 
@@ -785,7 +780,6 @@ no_thread_group:
 			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
-	BUG_ON(!thread_group_empty(current));
 	BUG_ON(!thread_group_leader(current));
 	return 0;
 }
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c8d07030c89..e2d6208633a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -605,27 +605,28 @@ got:
 	insert_inode_hash(inode);
 
 	if (DQUOT_ALLOC_INODE(inode)) {
-		DQUOT_DROP(inode);
 		err = -ENOSPC;
-		goto fail2;
+		goto fail_drop;
 	}
+
 	err = ext2_init_acl(inode, dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
-	}
+	if (err)
+		goto fail_free_drop;
+
 	err = ext2_init_security(inode,dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		goto fail2;
-	}
+	if (err)
+		goto fail_free_drop;
+
 	mark_inode_dirty(inode);
 	ext2_debug("allocating inode %lu\n", inode->i_ino);
 	ext2_preread_inode(inode);
 	return inode;
 
-fail2:
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index e463dca008e..0213db4911a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1410,7 +1410,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	unsigned long desc_count;
 	struct ext3_group_desc *gdp;
 	int i;
-	unsigned long ngroups;
+	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
 #ifdef EXT3FS_DEBUG
 	struct ext3_super_block *es;
 	unsigned long bitmap_count, x;
@@ -1421,7 +1421,8 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1443,7 +1444,6 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	return bitmap_count;
 #else
 	desc_count = 0;
-	ngroups = EXT3_SB(sb)->s_groups_count;
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 96552769d03..6549945f9ac 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -597,27 +597,22 @@ got:
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
-		DQUOT_DROP(inode);
 		err = -EDQUOT;
-		goto fail2;
+		goto fail_drop;
 	}
+
 	err = ext3_init_acl(handle, inode, dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
-  	}
+	if (err)
+		goto fail_free_drop;
+
 	err = ext3_init_security(handle,inode, dir);
-	if (err) {
-		DQUOT_FREE_INODE(inode);
-		goto fail2;
-	}
+	if (err)
+		goto fail_free_drop;
+
 	err = ext3_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext3_std_error(sb, err);
-		DQUOT_FREE_INODE(inode);
-		DQUOT_DROP(inode);
-		goto fail2;
+		goto fail_free_drop;
 	}
 
 	ext3_debug("allocating inode %lu\n", inode->i_ino);
@@ -631,7 +626,11 @@ really_out:
 	brelse(bitmap_bh);
 	return ret;
 
-fail2:
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 2c9f81278d5..57f79106267 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -242,7 +242,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 		struct buffer_head *it;
 
-		ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
+		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
 		if (IS_ERR(it = bclean(handle, sb, block))) {
 			err = PTR_ERR(it);
 			goto exit_bh;
@@ -643,8 +643,8 @@ static void update_backups(struct super_block *sb,
 			break;
 
 		bh = sb_getblk(sb, group * bpg + blk_off);
-		ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
-			   bh->b_blocknr);
+		ext3_debug("update metadata backup %#04lx\n",
+			  (unsigned long)bh->b_blocknr);
 		if ((err = ext3_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a93c3609025..9e24ceb019f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -512,15 +512,14 @@ static void ext3_clear_inode(struct inode *inode)
 
 static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb);
+	struct super_block *sb = vfs->mnt_sb;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA)
+	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
 		seq_puts(seq, ",data=journal");
-
-	if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
 		seq_puts(seq, ",data=ordered");
-
-	if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
 #if defined(CONFIG_QUOTA)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 62ffa913940..7134403d5be 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -12,39 +12,6 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 
-static ssize_t fat_file_aio_write(struct kiocb *iocb, const char __user *buf,
-				  size_t count, loff_t pos)
-{
-	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
-	int retval;
-
-	retval = generic_file_aio_write(iocb, buf, count, pos);
-	if (retval > 0) {
-		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-		mark_inode_dirty(inode);
-//		check the locking rules
-//		if (IS_SYNC(inode))
-//			fat_sync_inode(inode);
-	}
-	return retval;
-}
-
-static ssize_t fat_file_writev(struct file *filp, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t *ppos)
-{
-	struct inode *inode = filp->f_dentry->d_inode;
-	int retval;
-
-	retval = generic_file_writev(filp, iov, nr_segs, ppos);
-	if (retval > 0) {
-		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-		mark_inode_dirty(inode);
-	}
-	return retval;
-}
-
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
 {
@@ -148,9 +115,9 @@ struct file_operations fat_file_operations = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.readv		= generic_file_readv,
-	.writev		= fat_file_writev,
+	.writev		= generic_file_writev,
 	.aio_read	= generic_file_aio_read,
-	.aio_write	= fat_file_aio_write,
+	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.ioctl		= fat_generic_ioctl,
 	.fsync		= file_fsync,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a7cbe68e225..e2effe2dc9b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -102,6 +102,19 @@ static int fat_prepare_write(struct file *file, struct page *page,
 				  &MSDOS_I(page->mapping->host)->mmu_private);
 }
 
+static int fat_commit_write(struct file *file, struct page *page,
+			    unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	int err = generic_commit_write(file, page, from, to);
+	if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+		mark_inode_dirty(inode);
+	}
+	return err;
+}
+
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping, block, fat_get_block);
@@ -112,7 +125,7 @@ static struct address_space_operations fat_aops = {
 	.writepage	= fat_writepage,
 	.sync_page	= block_sync_page,
 	.prepare_write	= fat_prepare_write,
-	.commit_write	= generic_commit_write,
+	.commit_write	= fat_commit_write,
 	.bmap		= _fat_bmap
 };
 
@@ -287,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blksize = sbi->cluster_size;
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+	inode->i_mtime.tv_sec =
 		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
-	inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
@@ -297,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 			date_dos2unix(le16_to_cpu(de->ctime),
 				      le16_to_cpu(de->cdate)) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
+		inode->i_atime.tv_sec =
+			date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate));
+		inode->i_atime.tv_nsec = 0;
 	} else
-		inode->i_ctime = inode->i_mtime;
+		inode->i_ctime = inode->i_atime = inode->i_mtime;
 
 	return 0;
 }
@@ -500,7 +516,9 @@ retry:
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
 	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
 	if (sbi->options.isvfat) {
+		__le16 atime;
 		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
+		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
diff --git a/fs/file.c b/fs/file.c
index 2127a7b9dc3..fd066b261c7 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -69,13 +69,9 @@ void free_fd_array(struct file **array, int num)
 
 static void __free_fdtable(struct fdtable *fdt)
 {
-	int fdset_size, fdarray_size;
-
-	fdset_size = fdt->max_fdset / 8;
-	fdarray_size = fdt->max_fds * sizeof(struct file *);
-	free_fdset(fdt->open_fds, fdset_size);
-	free_fdset(fdt->close_on_exec, fdset_size);
-	free_fd_array(fdt->fd, fdarray_size);
+	free_fdset(fdt->open_fds, fdt->max_fdset);
+	free_fdset(fdt->close_on_exec, fdt->max_fdset);
+	free_fd_array(fdt->fd, fdt->max_fds);
 	kfree(fdt);
 }
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e79e49b3eec..29f1e9f6e85 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -96,6 +96,8 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
 	err = req->out.h.error;
+	if (!err && (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID))
+		err = -EIO;
 	if (!err) {
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 				  &outarg.attr);
@@ -152,6 +154,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 		fuse_put_request(fc, req);
 		return err;
 	}
+	if (!outarg.nodeid || outarg.nodeid == FUSE_ROOT_ID) {
+		fuse_put_request(fc, req);
+		return -EIO;
+	}
 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 			  &outarg.attr);
 	if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6454022b053..657ab11c173 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -23,6 +23,10 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	struct fuse_file *ff;
 	int err;
 
+	/* VFS checks this, but only _after_ ->open() */
+	if (file->f_flags & O_DIRECT)
+		return -EINVAL;
+
 	err = generic_file_open(inode, file);
 	if (err)
 		return err;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 59c5062cd63..dd711310626 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -793,11 +793,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	return(err);
 }
 
-void hostfs_truncate(struct inode *ino)
-{
-	not_implemented();
-}
-
 int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
 {
 	char *name;
@@ -894,7 +889,6 @@ static struct inode_operations hostfs_iops = {
 	.rmdir		= hostfs_rmdir,
 	.mknod		= hostfs_mknod,
 	.rename		= hostfs_rename,
-	.truncate	= hostfs_truncate,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
 	.getattr	= hostfs_getattr,
@@ -910,7 +904,6 @@ static struct inode_operations hostfs_dir_iops = {
 	.rmdir		= hostfs_rmdir,
 	.mknod		= hostfs_mknod,
 	.rename		= hostfs_rename,
-	.truncate	= hostfs_truncate,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
 	.getattr	= hostfs_getattr,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 0ec62d5310d..9f942ca8e4e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -129,8 +129,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (!is_bad_inode(inode) &&
-	    (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
-
+	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
 		truncate_inode_pages(&inode->i_data, 0);
 
 		if (test_cflag(COMMIT_Freewmap, inode))
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c739626f5bf..eadf319bee2 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3055,7 +3055,7 @@ static int cntlz(u32 value)
  * RETURN VALUES:
  *      log2 number of blocks
  */
-int blkstol2(s64 nb)
+static int blkstol2(s64 nb)
 {
 	int l2nb;
 	s64 mask;		/* meant to be signed */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index c7a92f9deb2..9b71ed2674f 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -725,6 +725,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	else
 		tlck->flag = tlckINODELOCK;
 
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
+
 	tlck->type = 0;
 
 	/* bind the tlock and the page */
@@ -1009,6 +1012,8 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 
 	/* bind the tlock and the object */
 	tlck->flag = tlckINODELOCK;
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
 	tlck->ip = ip;
 	tlck->mp = NULL;
 
@@ -1077,6 +1082,8 @@ struct linelock *txLinelock(struct linelock * tlock)
 	linelock->flag = tlckLINELOCK;
 	linelock->maxcnt = TLOCKLONG;
 	linelock->index = 0;
+	if (tlck->flag & tlckDIRECTORY)
+		linelock->flag |= tlckDIRECTORY;
 
 	/* append linelock after tlock */
 	linelock->next = tlock->next;
@@ -2070,8 +2077,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
  *
  * function:    log from maplock of freed data extents;
  */
-void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-	    struct tlock * tlck)
+static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+		   struct tlock * tlck)
 {
 	struct pxd_lock *pxdlock;
 	int i, nlock;
@@ -2209,7 +2216,7 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
  * function: synchronously write pages locked by transaction
  *              after txLog() but before txUpdateMap();
  */
-void txForce(struct tblock * tblk)
+static void txForce(struct tblock * tblk)
 {
 	struct tlock *tlck;
 	lid_t lid, next;
@@ -2358,7 +2365,7 @@ static void txUpdateMap(struct tblock * tblk)
 			 */
 			else {	/* (maplock->flag & mlckFREE) */
 
-				if (S_ISDIR(tlck->ip->i_mode))
+				if (tlck->flag & tlckDIRECTORY)
 					txFreeMap(ipimap, maplock,
 						  tblk, COMMIT_PWMAP);
 				else
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 59ad0f6b723..0e4dc4514c4 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -122,6 +122,7 @@ extern struct tlock *TxLock;	/* transaction lock table */
 #define tlckLOG			0x0800
 /* updateMap state */
 #define	tlckUPDATEMAP		0x0080
+#define	tlckDIRECTORY		0x0040
 /* freeLock state */
 #define tlckFREELOCK		0x0008
 #define tlckWRITEPAGE		0x0004
diff --git a/fs/locks.c b/fs/locks.c
index c2c09b4798d..f7daa5f4894 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -124,6 +124,7 @@
 #include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/rcupdate.h>
 
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -2205,6 +2206,7 @@ void steal_locks(fl_owner_t from)
 
 	lock_kernel();
 	j = 0;
+	rcu_read_lock();
 	fdt = files_fdtable(files);
 	for (;;) {
 		unsigned long set;
@@ -2222,6 +2224,7 @@ void steal_locks(fl_owner_t from)
 			set >>= 1;
 		}
 	}
+	rcu_read_unlock();
 	unlock_kernel();
 }
 EXPORT_SYMBOL(steal_locks);
diff --git a/fs/namei.c b/fs/namei.c
index 21d85f1ac83..043d587216b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1048,7 +1048,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
 out:
 	if (unlikely(current->audit_context
 		     && nd && nd->dentry && nd->dentry->d_inode))
-		audit_inode(name, nd->dentry->d_inode);
+		audit_inode(name, nd->dentry->d_inode, flags);
 	return retval;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6ceb1d471f2..9758ebd4990 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -184,14 +184,13 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
 	unlock_page(req->wb_page);
 
-	nfs_clear_request(req);
-	nfs_release_request(req);
-
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
+	nfs_clear_request(req);
+	nfs_release_request(req);
 }
 
 /*
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e08edc17c6a..361b4007d4a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -162,7 +162,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 
 
 static inline int
-nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
@@ -238,8 +238,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	 */
 	status = nfsd4_process_open2(rqstp, current_fh, open);
 out:
-	if (open->op_stateowner)
+	if (open->op_stateowner) {
 		nfs4_get_stateowner(open->op_stateowner);
+		*replay_owner = open->op_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -809,8 +811,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_access(rqstp, current_fh, &op->u.access);
 			break;
 		case OP_CLOSE:
-			op->status = nfsd4_close(rqstp, current_fh, &op->u.close);
-			replay_owner = op->u.close.cl_stateowner;
+			op->status = nfsd4_close(rqstp, current_fh, &op->u.close, &replay_owner);
 			break;
 		case OP_COMMIT:
 			op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit);
@@ -831,15 +832,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link);
 			break;
 		case OP_LOCK:
-			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock);
-			replay_owner = op->u.lock.lk_stateowner;
+			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock, &replay_owner);
 			break;
 		case OP_LOCKT:
 			op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt);
 			break;
 		case OP_LOCKU:
-			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku);
-			replay_owner = op->u.locku.lu_stateowner;
+			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku, &replay_owner);
 			break;
 		case OP_LOOKUP:
 			op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup);
@@ -853,16 +852,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 				op->status = nfs_ok;
 			break;
 		case OP_OPEN:
-			op->status = nfsd4_open(rqstp, current_fh, &op->u.open);
-			replay_owner = op->u.open.op_stateowner;
+			op->status = nfsd4_open(rqstp, current_fh, &op->u.open, &replay_owner);
 			break;
 		case OP_OPEN_CONFIRM:
-			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm);
-			replay_owner = op->u.open_confirm.oc_stateowner;
+			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm, &replay_owner);
 			break;
 		case OP_OPEN_DOWNGRADE:
-			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade);
-			replay_owner = op->u.open_downgrade.od_stateowner;
+			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade, &replay_owner);
 			break;
 		case OP_PUTFH:
 			op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b83f8fb441e..6bbefd06f10 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -624,7 +624,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	cb->cb_ident = se->se_callback_ident;
 	return;
 out_err:
-	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
@@ -678,13 +678,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	int 			status;
 	char                    dname[HEXDIR_LEN];
 	
-	status = nfserr_inval;
 	if (!check_name(clname))
-		goto out;
+		return nfserr_inval;
 
 	status = nfs4_make_rec_clidname(dname, &clname);
 	if (status)
-		goto out;
+		return status;
 
 	/* 
 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
@@ -2014,7 +2013,7 @@ STALE_STATEID(stateid_t *stateid)
 {
 	if (stateid->si_boot == boot_time)
 		return 0;
-	printk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
+	dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
 		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
 		stateid->si_generation);
 	return 1;
@@ -2275,7 +2274,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 
 check_replay:
 	if (seqid == sop->so_seqid - 1) {
-		printk("NFSD: preprocess_seqid_op: retransmission?\n");
+		dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
 		/* indicate replay to calling function */
 		return NFSERR_REPLAY_ME;
 	}
@@ -2286,7 +2285,7 @@ check_replay:
 }
 
 int
-nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateowner *sop;
@@ -2320,8 +2319,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 
 	nfsd4_create_clid_dir(sop->so_client);
 out:
-	if (oc->oc_stateowner)
+	if (oc->oc_stateowner) {
 		nfs4_get_stateowner(oc->oc_stateowner);
+		*replay_owner = oc->oc_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2352,7 +2353,7 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 }
 
 int
-nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
+nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateid *stp;
@@ -2394,8 +2395,10 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
 	memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
 	status = nfs_ok;
 out:
-	if (od->od_stateowner)
+	if (od->od_stateowner) {
 		nfs4_get_stateowner(od->od_stateowner);
+		*replay_owner = od->od_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2404,7 +2407,7 @@ out:
  * nfs4_unlock_state() called after encode
  */
 int
-nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close)
+nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
 {
 	int status;
 	struct nfs4_stateid *stp;
@@ -2430,8 +2433,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
 	/* release_state_owner() calls nfsd_close() if needed */
 	release_state_owner(stp, OPEN_STATE);
 out:
-	if (close->cl_stateowner)
+	if (close->cl_stateowner) {
 		nfs4_get_stateowner(close->cl_stateowner);
+		*replay_owner = close->cl_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2500,8 +2505,7 @@ find_stateid(stateid_t *stid, int flags)
 			    (local->st_stateid.si_fileid == f_id))
 				return local;
 		}
-	} else
-		printk("NFSD: find_stateid: ERROR: no state flag\n");
+	}
 	return NULL;
 }
 
@@ -2624,7 +2628,9 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	sop->so_is_open_owner = 0;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
-	sop->so_seqid = lock->lk_new_lock_seqid;
+	/* It is the openowner seqid that will be incremented in encode in the
+	 * case of new lockowners; so increment the lock seqid manually: */
+	sop->so_seqid = lock->lk_new_lock_seqid + 1;
 	sop->so_confirmed = 1;
 	rp = &sop->so_replay;
 	rp->rp_status = NFSERR_SERVERFAULT;
@@ -2676,9 +2682,10 @@ check_lock_length(u64 offset, u64 length)
  *  LOCK operation 
  */
 int
-nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
+nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateowner *open_sop = NULL;
+	struct nfs4_stateowner *lock_sop = NULL;
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
@@ -2705,19 +2712,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		struct nfs4_file *fp;
 		
 		status = nfserr_stale_clientid;
-		if (STALE_CLIENTID(&lock->lk_new_clientid)) {
-			printk("NFSD: nfsd4_lock: clientid is stale!\n");
+		if (STALE_CLIENTID(&lock->lk_new_clientid))
 			goto out;
-		}
 
 		/* validate and update open stateid and open seqid */
 		status = nfs4_preprocess_seqid_op(current_fh, 
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
 		                        CHECK_FH | OPEN_STATE,
-		                        &open_sop, &open_stp, lock);
+		                        &lock->lk_stateowner, &open_stp,
+					lock);
 		if (status)
 			goto out;
+		open_sop = lock->lk_stateowner;
 		/* create lockowner and lock stateid */
 		fp = open_stp->st_file;
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
@@ -2727,16 +2734,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		 * the same file, or should they just be allowed (and
 		 * create new stateids)? */
 		status = nfserr_resource;
-		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
+		lock_sop = alloc_init_lock_stateowner(strhashval,
+				open_sop->so_client, open_stp, lock);
+		if (lock_sop == NULL)
 			goto out;
-		if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, 
-						fp, open_stp)) == NULL) {
-			release_stateowner(lock->lk_stateowner);
-			lock->lk_stateowner = NULL;
+		lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
+		if (lock_stp == NULL) {
+			release_stateowner(lock_sop);
 			goto out;
 		}
-		/* bump the open seqid used to create the lock */
-		open_sop->so_seqid++;
 	} else {
 		/* lock (lock owner + lock stateid) already exists */
 		status = nfs4_preprocess_seqid_op(current_fh,
@@ -2746,12 +2752,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 				       &lock->lk_stateowner, &lock_stp, lock);
 		if (status)
 			goto out;
+		lock_sop = lock->lk_stateowner;
 	}
 	/* lock->lk_stateowner and lock_stp have been created or found */
 	filp = lock_stp->st_vfs_file;
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
-		printk("NFSD: nfsd4_lock: permission denied!\n");
+		dprintk("NFSD: nfsd4_lock: permission denied!\n");
 		goto out;
 	}
 
@@ -2776,7 +2783,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 			status = nfserr_inval;
 		goto out;
 	}
-	file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner;
+	file_lock.fl_owner = (fl_owner_t)lock_sop;
 	file_lock.fl_pid = current->tgid;
 	file_lock.fl_file = filp;
 	file_lock.fl_flags = FL_POSIX;
@@ -2832,14 +2839,13 @@ out_destroy_new_stateid:
 		 * An error encountered after instantiation of the new
 		 * stateid has forced us to destroy it.
 		 */
-		if (!seqid_mutating_err(status))
-			open_sop->so_seqid--;
-
 		release_state_owner(lock_stp, LOCK_STATE);
 	}
 out:
-	if (lock->lk_stateowner)
+	if (lock->lk_stateowner) {
 		nfs4_get_stateowner(lock->lk_stateowner);
+		*replay_owner = lock->lk_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 }
@@ -2866,13 +2872,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	nfs4_lock_state();
 
 	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(&lockt->lt_clientid)) {
-		printk("NFSD: nfsd4_lockt: clientid is stale!\n");
+	if (STALE_CLIENTID(&lockt->lt_clientid))
 		goto out;
-	}
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
-		printk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
+		dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
 		if (status == nfserr_symlink)
 			status = nfserr_inval;
 		goto out;
@@ -2930,7 +2934,7 @@ out:
 }
 
 int
-nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku)
+nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
@@ -2976,7 +2980,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
 		file_lock.fl_ops->fl_release_private(&file_lock);
 	if (status) {
-		printk("NFSD: nfs4_locku: posix_lock_file failed!\n");
+		dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
 		goto out_nfserr;
 	}
 	/*
@@ -2986,8 +2990,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
 
 out:
-	if (locku->lu_stateowner)
+	if (locku->lu_stateowner) {
 		nfs4_get_stateowner(locku->lu_stateowner);
+		*replay_owner = locku->lu_stateowner;
+	}
 	nfs4_unlock_state();
 	return status;
 
@@ -3036,10 +3042,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 	/* XXX check for lease expiration */
 
 	status = nfserr_stale_clientid;
-	if (STALE_CLIENTID(clid)) {
-		printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n");
+	if (STALE_CLIENTID(clid))
 		return status;
-	}
 
 	nfs4_lock_state();
 
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 49eafbdb15c..83f3322765c 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -29,7 +29,8 @@ ToDo/Notes:
 	  The Windows boot will run chkdsk and then reboot.  The user can then
 	  immediately boot into Linux rather than having to do a full Windows
 	  boot first before rebooting into Linux and we will recognize such a
-	  journal and empty it as it is clean by definition.
+	  journal and empty it as it is clean by definition.  Note, this only
+	  works if chkdsk left the journal in an obviously clean state.
 	- Support journals ($LogFile) with only one restart page as well as
 	  journals with two different restart pages.  We sanity check both and
 	  either use the only sane one or the more recent one of the two in the
@@ -92,6 +93,15 @@ ToDo/Notes:
 	  an octal number to conform to how chmod(1) works, too.  Thanks to
 	  Giuseppe Bilotta and Horst von Brand for pointing out the errors of
 	  my ways.
+	- Fix various bugs in the runlist merging code.  (Based on libntfs
+	  changes by Richard Russon.)
+	- Fix sparse warnings that have crept in over time.
+	- Change ntfs_cluster_free() to require a write locked runlist on entry
+	  since we otherwise get into a lock reversal deadlock if a read locked
+	  runlist is passed in. In the process also change it to take an ntfs
+	  inode instead of a vfs inode as parameter.
+	- Fix the definition of the CHKD ntfs record magic.  It had an off by
+	  two error causing it to be CHKB instead of CHKD.
 
 2.1.23 - Implement extension of resident files and make writing safe as well as
 	 many bug fixes, cleanups, and enhancements...
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b6cc8cf2462..5e80c07c6a4 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	unsigned long flags;
 	struct buffer_head *first, *tmp;
 	struct page *page;
+	struct inode *vi;
 	ntfs_inode *ni;
 	int page_uptodate = 1;
 
 	page = bh->b_page;
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 
 	if (likely(uptodate)) {
-		s64 file_ofs, initialized_size;
+		loff_t i_size;
+		s64 file_ofs, init_size;
 
 		set_buffer_uptodate(bh);
 
 		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
 				bh_offset(bh);
 		read_lock_irqsave(&ni->size_lock, flags);
-		initialized_size = ni->initialized_size;
+		init_size = ni->initialized_size;
+		i_size = i_size_read(vi);
 		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (unlikely(init_size > i_size)) {
+			/* Race with shrinking truncate. */
+			init_size = i_size;
+		}
 		/* Check for the current buffer head overflowing. */
-		if (file_ofs + bh->b_size > initialized_size) {
-			char *addr;
-			int ofs = 0;
-
-			if (file_ofs < initialized_size)
-				ofs = initialized_size - file_ofs;
-			addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
-			memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+		if (unlikely(file_ofs + bh->b_size > init_size)) {
+			u8 *kaddr;
+			int ofs;
+
+			ofs = 0;
+			if (file_ofs < init_size)
+				ofs = init_size - file_ofs;
+			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+			memset(kaddr + bh_offset(bh) + ofs, 0,
+					bh->b_size - ofs);
+			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 			flush_dcache_page(page);
-			kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		}
 	} else {
 		clear_buffer_uptodate(bh);
 		SetPageError(page);
-		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
-				(unsigned long long)bh->b_blocknr);
+		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
+				"0x%llx.", (unsigned long long)bh->b_blocknr);
 	}
 	first = page_buffers(page);
 	local_irq_save(flags);
@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	} else {
-		char *addr;
+		u8 *kaddr;
 		unsigned int i, recs;
 		u32 rec_size;
 
@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		recs = PAGE_CACHE_SIZE / rec_size;
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
-		addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
 		for (i = 0; i < recs; i++)
-			post_read_mst_fixup((NTFS_RECORD*)(addr +
+			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
 					i * rec_size), rec_size);
+		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 		flush_dcache_page(page);
-		kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	}
@@ -168,8 +178,11 @@ still_busy:
  */
 static int ntfs_read_block(struct page *page)
 {
+	loff_t i_size;
 	VCN vcn;
 	LCN lcn;
+	s64 init_size;
+	struct inode *vi;
 	ntfs_inode *ni;
 	ntfs_volume *vol;
 	runlist_element *rl;
@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page)
 	int i, nr;
 	unsigned char blocksize_bits;
 
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 	vol = ni->vol;
 
 	/* $MFT/$DATA must have its complete runlist in memory at all times. */
@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page)
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
 
+	/*
+	 * We may be racing with truncate.  To avoid some of the problems we
+	 * now take a snapshot of the various sizes and use those for the whole
+	 * of the function.  In case of an extending truncate it just means we
+	 * may leave some buffers unmapped which are now allocated.  This is
+	 * not a problem since these buffers will just get mapped when a write
+	 * occurs.  In case of a shrinking truncate, we will detect this later
+	 * on due to the runlist being incomplete and if the page is being
+	 * fully truncated, truncate will throw it away as soon as we unlock
+	 * it so no need to worry what we do with it.
+	 */
 	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
-	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+	init_size = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(init_size > i_size)) {
+		/* Race with shrinking truncate. */
+		init_size = i_size;
+	}
+	zblock = (init_size + blocksize - 1) >> blocksize_bits;
 
 	/* Loop through all the buffers in the page. */
 	rl = NULL;
@@ -366,6 +397,8 @@ handle_zblock:
  */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
+	loff_t i_size;
+	struct inode *vi;
 	ntfs_inode *ni, *base_ni;
 	u8 *kaddr;
 	ntfs_attr_search_ctx *ctx;
@@ -384,14 +417,17 @@ retry_readpage:
 		unlock_page(page);
 		return 0;
 	}
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 	/*
 	 * Only $DATA attributes can be encrypted and only unnamed $DATA
 	 * attributes can be compressed.  Index root can have the flags set but
 	 * this means to create compressed/encrypted files, not that the
-	 * attribute is compressed/encrypted.
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
 	 */
-	if (ni->type != AT_INDEX_ROOT) {
+	if (ni->type != AT_INDEX_ALLOCATION) {
 		/* If attribute is encrypted, deny access, just like NT4. */
 		if (NInoEncrypted(ni)) {
 			BUG_ON(ni->type != AT_DATA);
@@ -456,7 +492,12 @@ retry_readpage:
 	read_lock_irqsave(&ni->size_lock, flags);
 	if (unlikely(attr_len > ni->initialized_size))
 		attr_len = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate. */
+		attr_len = i_size;
+	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data to the page. */
 	memcpy(kaddr, (u8*)ctx->attr +
@@ -1341,9 +1382,11 @@ retry_writepage:
 	 * Only $DATA attributes can be encrypted and only unnamed $DATA
 	 * attributes can be compressed.  Index root can have the flags set but
 	 * this means to create compressed/encrypted files, not that the
-	 * attribute is compressed/encrypted.
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
 	 */
-	if (ni->type != AT_INDEX_ROOT) {
+	if (ni->type != AT_INDEX_ALLOCATION) {
 		/* If file is encrypted, deny access, just like NT4. */
 		if (NInoEncrypted(ni)) {
 			unlock_page(page);
@@ -1379,8 +1422,8 @@ retry_writepage:
 			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
-			flush_dcache_page(page);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(page);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
@@ -1443,34 +1486,33 @@ retry_writepage:
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 	unlock_page(page);
-	/*
-	 * Here, we do not need to zero the out of bounds area everytime
-	 * because the below memcpy() already takes care of the
-	 * mmap-at-end-of-file requirements.  If the file is converted to a
-	 * non-resident one, then the code path use is switched to the
-	 * non-resident one where the zeroing happens on each ntfs_writepage()
-	 * invocation.
-	 */
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
 	i_size = i_size_read(vi);
 	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate or a failed truncate. */
 		attr_len = i_size;
-		ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+		/*
+		 * If the truncate failed, fix it up now.  If a concurrent
+		 * truncate, we do its job, so it does not have to do anything.
+		 */
+		err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+				attr_len);
+		/* Shrinking cannot fail. */
+		BUG_ON(err);
 	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data from the page to the mft record. */
 	memcpy((u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			kaddr, attr_len);
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	/* Zero out of bounds area in the page cache page. */
 	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
-
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	flush_dcache_page(page);
+	/* We are done with the page. */
 	end_page_writeback(page);
-
-	/* Mark the mft record dirty, so it gets written back. */
+	/* Finally, mark the mft record dirty, so it gets written back. */
 	mark_mft_record_dirty(ctx->ntfs_ino);
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(base_ni);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc4bbe3acf5..7ec04513180 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1166,6 +1166,8 @@ err_out:
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
  * have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
  */
 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 {
@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			}
 		}
 		/*
-		 * The encryption flag set in an index root just means to
-		 * compress all files.
+		 * The compressed/sparse flag set in an index root just means
+		 * to compress all files.
 		 */
 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
 			ntfs_error(vi->i_sb, "Found mst protected attribute "
@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"the mapping pairs array.");
 			goto unm_err_out;
 		}
-		if ((NInoCompressed(ni) || NInoSparse(ni)) &&
-				ni->type != AT_INDEX_ROOT) {
+		if (NInoCompressed(ni) || NInoSparse(ni)) {
 			if (a->data.non_resident.compression_unit != 4) {
 				ntfs_error(vi->i_sb, "Found nonstandard "
 						"compression unit (%u instead "
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 609ad1728ce..01f2dfa39ce 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -123,7 +123,7 @@ enum {
 	magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */
 
 	/* Found in $LogFile/$DATA.  (May be found in $MFT/$DATA, also?) */
-	magic_CHKD = const_cpu_to_le32(0x424b4843), /* Modified by chkdsk. */
+	magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
 
 	/* Found in all ntfs record containing records. */
 	magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector
@@ -308,10 +308,8 @@ typedef le16 MFT_RECORD_FLAGS;
  * The _LE versions are to be applied on little endian MFT_REFs.
  * Note: The _LE versions will return a CPU endian formatted value!
  */
-typedef enum {
-	MFT_REF_MASK_CPU	= 0x0000ffffffffffffULL,
-	MFT_REF_MASK_LE		= const_cpu_to_le64(0x0000ffffffffffffULL),
-} MFT_REF_CONSTS;
+#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
+#define MFT_REF_MASK_LE const_cpu_to_le64(0x0000ffffffffffffULL)
 
 typedef u64 MFT_REF;
 typedef le64 leMFT_REF;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 7b593429068..5af3bf0b7ee 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -779,14 +779,13 @@ out:
 
 /**
  * __ntfs_cluster_free - free clusters on an ntfs volume
- * @vi:		vfs inode whose runlist describes the clusters to free
- * @start_vcn:	vcn in the runlist of @vi at which to start freeing clusters
+ * @ni:		ntfs inode whose runlist describes the clusters to free
+ * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
- * @write_locked:	true if the runlist is locked for writing
  * @is_rollback:	true if this is a rollback operation
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the vfs inode @ni.
  *
  * If @count is -1, all clusters from @start_vcn to the end of the runlist are
  * deallocated.  Thus, to completely free all clusters in a runlist, use
@@ -801,31 +800,28 @@ out:
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
- * Locking: - The runlist described by @vi must be locked on entry and is
- *	      locked on return.  Note if the runlist is locked for reading the
- *	      lock may be dropped and reacquired.  Note the runlist may be
- *	      modified when needed runlist fragments need to be mapped.
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
  *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
  */
-s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
-		const BOOL write_locked, const BOOL is_rollback)
+s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
+		const BOOL is_rollback)
 {
 	s64 delta, to_free, total_freed, real_freed;
-	ntfs_inode *ni;
 	ntfs_volume *vol;
 	struct inode *lcnbmp_vi;
 	runlist_element *rl;
 	int err;
 
-	BUG_ON(!vi);
+	BUG_ON(!ni);
 	ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
-			"0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn,
+			"0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
 			(unsigned long long)count,
 			is_rollback ? " (rollback)" : "");
-	ni = NTFS_I(vi);
 	vol = ni->vol;
 	lcnbmp_vi = vol->lcnbmp_ino;
 	BUG_ON(!lcnbmp_vi);
@@ -843,7 +839,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 	total_freed = real_freed = 0;
 
-	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked);
+	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, TRUE);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -897,7 +893,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked);
+			rl = ntfs_attr_find_vcn_nolock(ni, vcn, TRUE);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
@@ -965,8 +961,7 @@ err_out:
 	 * If rollback fails, set the volume errors flag, emit an error
 	 * message, and return the error code.
 	 */
-	delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked,
-			TRUE);
+	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, TRUE);
 	if (delta < 0) {
 		ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
 				"inconsistent metadata!  Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index e4d7fb98d68..a6a8827882e 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -2,7 +2,7 @@
  * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation.  Part of the
  *		Linux-NTFS project.
  *
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -28,6 +28,7 @@
 #include <linux/fs.h>
 
 #include "types.h"
+#include "inode.h"
 #include "runlist.h"
 #include "volume.h"
 
@@ -42,18 +43,17 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
 		const VCN start_vcn, const s64 count, const LCN start_lcn,
 		const NTFS_CLUSTER_ALLOCATION_ZONES zone);
 
-extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
-		s64 count, const BOOL write_locked, const BOOL is_rollback);
+extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+		s64 count, const BOOL is_rollback);
 
 /**
  * ntfs_cluster_free - free clusters on an ntfs volume
- * @vi:		vfs inode whose runlist describes the clusters to free
- * @start_vcn:	vcn in the runlist of @vi at which to start freeing clusters
+ * @ni:		ntfs inode whose runlist describes the clusters to free
+ * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
  * @count:	number of clusters to free or -1 for all clusters
- * @write_locked:	true if the runlist is locked for writing
  *
  * Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the ntfs inode @ni.
  *
  * If @count is -1, all clusters from @start_vcn to the end of the runlist are
  * deallocated.  Thus, to completely free all clusters in a runlist, use
@@ -65,19 +65,18 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
  * Return the number of deallocated clusters (not counting sparse ones) on
  * success and -errno on error.
  *
- * Locking: - The runlist described by @vi must be locked on entry and is
- *	      locked on return.  Note if the runlist is locked for reading the
- *	      lock may be dropped and reacquired.  Note the runlist may be
- *	      modified when needed runlist fragments need to be mapped.
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ *	      and is locked on return.  Note the runlist may be modified when
+ *	      needed runlist fragments need to be mapped.
  *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
  *	      on return.
  *	    - This function takes the volume lcn bitmap lock for writing and
  *	      modifies the bitmap contents.
  */
-static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
-		s64 count, const BOOL write_locked)
+static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+		s64 count)
 {
-	return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE);
+	return __ntfs_cluster_free(ni, start_vcn, count, FALSE);
 }
 
 extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 0173e95500d..0fd70295cca 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -51,7 +51,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 		RESTART_PAGE_HEADER *rp, s64 pos)
 {
 	u32 logfile_system_page_size, logfile_log_page_size;
-	u16 usa_count, usa_ofs, usa_end, ra_ofs;
+	u16 ra_ofs, usa_count, usa_ofs, usa_end = 0;
+	BOOL have_usa = TRUE;
 
 	ntfs_debug("Entering.");
 	/*
@@ -86,6 +87,14 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 				(int)sle16_to_cpu(rp->minor_ver));
 		return FALSE;
 	}
+	/*
+	 * If chkdsk has been run the restart page may not be protected by an
+	 * update sequence array.
+	 */
+	if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) {
+		have_usa = FALSE;
+		goto skip_usa_checks;
+	}
 	/* Verify the size of the update sequence array. */
 	usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS);
 	if (usa_count != le16_to_cpu(rp->usa_count)) {
@@ -102,6 +111,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 				"inconsistent update sequence array offset.");
 		return FALSE;
 	}
+skip_usa_checks:
 	/*
 	 * Verify the position of the restart area.  It must be:
 	 *	- aligned to 8-byte boundary,
@@ -109,7 +119,8 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
 	 *	- within the system page size.
 	 */
 	ra_ofs = le16_to_cpu(rp->restart_area_offset);
-	if (ra_ofs & 7 || ra_ofs < usa_end ||
+	if (ra_ofs & 7 || (have_usa ? ra_ofs < usa_end :
+			ra_ofs < sizeof(RESTART_PAGE_HEADER)) ||
 			ra_ofs > logfile_system_page_size) {
 		ntfs_error(vi->i_sb, "$LogFile restart page specifies "
 				"inconsistent restart area offset.");
@@ -402,8 +413,12 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
 			idx++;
 		} while (to_read > 0);
 	}
-	/* Perform the multi sector transfer deprotection on the buffer. */
-	if (post_read_mst_fixup((NTFS_RECORD*)trp,
+	/*
+	 * Perform the multi sector transfer deprotection on the buffer if the
+	 * restart page is protected.
+	 */
+	if ((!ntfs_is_chkd_record(trp->magic) || le16_to_cpu(trp->usa_count))
+			&& post_read_mst_fixup((NTFS_RECORD*)trp,
 			le32_to_cpu(rp->system_page_size))) {
 		/*
 		 * A multi sector tranfer error was detected.  We only need to
@@ -615,11 +630,16 @@ is_empty:
 		 * Otherwise just throw it away.
 		 */
 		if (rstr2_lsn > rstr1_lsn) {
+			ntfs_debug("Using second restart page as it is more "
+					"recent.");
 			ntfs_free(rstr1_ph);
 			rstr1_ph = rstr2_ph;
 			/* rstr1_lsn = rstr2_lsn; */
-		} else
+		} else {
+			ntfs_debug("Using first restart page as it is more "
+					"recent.");
 			ntfs_free(rstr2_ph);
+		}
 		rstr2_ph = NULL;
 	}
 	/* All consistency checks passed. */
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 42388f95ea6..a51f3dd0e9e 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -113,7 +113,7 @@ typedef struct {
  */
 enum {
 	RESTART_VOLUME_IS_CLEAN	= const_cpu_to_le16(0x0002),
-	RESTART_SPACE_FILLER	= 0xffff, /* gcc: Force enum bit width to 16. */
+	RESTART_SPACE_FILLER	= const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
 } __attribute__ ((__packed__));
 
 typedef le16 RESTART_AREA_FLAGS;
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 3288bcc2c4a..006946efca8 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
 /*
  * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2c32b84385a..247586d1d5d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1953,7 +1953,7 @@ restore_undo_alloc:
 	a = ctx->attr;
 	a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
 undo_alloc:
-	if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) {
+	if (ntfs_cluster_free(mft_ni, old_last_vcn, -1) < 0) {
 		ntfs_error(vol->sb, "Failed to free clusters from mft data "
 				"attribute.%s", es);
 		NVolSetErrors(vol);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index f5b2ac92908..061b5ff6b73 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2,7 +2,7 @@
  * runlist.c - NTFS runlist handling code.  Part of the Linux-NTFS project.
  *
  * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2002-2005 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -158,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	if ((dst->lcn < 0) || (src->lcn < 0)) {   /* Are we merging holes? */
-		if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
-			return TRUE;
+	/* We can merge unmapped regions even if they are misaligned. */
+	if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
+		return TRUE;
+	/* If the runs are misaligned, we cannot merge them. */
+	if ((dst->vcn + dst->length) != src->vcn)
 		return FALSE;
-	}
-	if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
-		return FALSE;
-	if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
-		return FALSE;
-
-	return TRUE;
+	/* If both runs are non-sparse and contiguous, we can merge them. */
+	if ((dst->lcn >= 0) && (src->lcn >= 0) &&
+			((dst->lcn + dst->length) == src->lcn))
+		return TRUE;
+	/* If we are merging two holes, we can merge them. */
+	if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
+		return TRUE;
+	/* Cannot merge. */
+	return FALSE;
 }
 
 /**
@@ -214,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
 static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL right;
-	int magic;
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
 	/* First, check if the right hand end needs merging. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 
 	/* Space required: @dst size + @src size, less one if we merged. */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
@@ -236,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 
-	magic = loc + ssize;
+	/* First run after the @src runs that have been inserted. */
+	marker = loc + ssize + 1;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+	ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
 	ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
 
 	/* Adjust the size of the preceding hole. */
 	dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
 
 	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic + 1].lcn == LCN_ENOENT)
-		dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+	if (dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 
 	return dst;
 }
@@ -279,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL disc = FALSE;	/* Discontinuity */
-	BOOL hole = FALSE;	/* Following a hole */
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL disc = FALSE;	/* Discontinuity between @dst and @src. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* disc => Discontinuity between the end of @dst and the start of @src.
-	 *	   This means we might need to insert a hole.
-	 * hole => @dst ends with a hole or an unmapped region which we can
-	 *	   extend to match the discontinuity. */
+	/*
+	 * disc => Discontinuity between the end of @dst and the start of @src.
+	 *	   This means we might need to insert a "not mapped" run.
+	 */
 	if (loc == 0)
 		disc = (src[0].vcn > 0);
 	else {
@@ -303,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 			merged_length += src->length;
 
 		disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
-		if (disc)
-			hole = (dst[loc - 1].lcn == LCN_HOLE);
 	}
-
-	/* Space required: @dst size + @src size, less one if we merged, plus
-	 * one if there was a discontinuity, less one for a trailing hole. */
-	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+	/*
+	 * Space required: @dst size + @src size, less one if we merged, plus
+	 * one if there was a discontinuity.
+	 */
+	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
 	if (IS_ERR(dst))
 		return dst;
 	/*
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlist.
 	 */
-
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	magic = loc + ssize - left + disc - hole;
+	/*
+	 * First run after the @src runs that have been inserted.
+	 * Nominally,  @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.  And if @disc, then @dst and @src do
+	 * not meet and we need an extra run to fill the gap.
+	 */
+	marker = loc + ssize - left + disc;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc, dsize - loc);
-	ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+	ntfs_rl_mm(dst, marker, loc, dsize - loc);
+	ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
 
-	/* Adjust the VCN of the last run ... */
-	if (dst[magic].lcn <= LCN_HOLE)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* Adjust the VCN of the first run after the insertion... */
+	dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	/* ... and the length. */
-	if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
-		dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+	if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
+		dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
 
-	/* Writing beyond the end of the file and there's a discontinuity. */
+	/* Writing beyond the end of the file and there is a discontinuity. */
 	if (disc) {
-		if (hole)
-			dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
-		else {
-			if (loc > 0) {
-				dst[loc].vcn = dst[loc - 1].vcn +
-						dst[loc - 1].length;
-				dst[loc].length = dst[loc + 1].vcn -
-						dst[loc].vcn;
-			} else {
-				dst[loc].vcn = 0;
-				dst[loc].length = dst[loc + 1].vcn;
-			}
-			dst[loc].lcn = LCN_RL_NOT_MAPPED;
+		if (loc > 0) {
+			dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+			dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+		} else {
+			dst[loc].vcn = 0;
+			dst[loc].length = dst[loc + 1].vcn;
 		}
-
-		magic += hole;
-
-		if (dst[magic].lcn == LCN_ENOENT)
-			dst[magic].vcn = dst[magic - 1].vcn +
-					dst[magic - 1].length;
+		dst[loc].lcn = LCN_RL_NOT_MAPPED;
 	}
 	return dst;
 }
@@ -385,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL right;
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int tail;		/* Start of tail of @dst. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* First, merge the left and right ends, if necessary. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	/* First, see if the left and right ends need merging. */
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 	if (loc > 0)
 		left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
-	/* Allocate some space. We'll need less if the left, right, or both
-	 * ends were merged. */
+	/*
+	 * Allocate some space.  We will need less if the left, right, or both
+	 * ends get merged.
+	 */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
 	if (IS_ERR(dst))
 		return dst;
@@ -406,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlists.
 	 */
+
+	/* First, merge the left and right ends, if necessary. */
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	/* FIXME: What does this mean? (AIA) */
-	magic = loc + ssize - left;
+	/*
+	 * Offset of the tail of @dst.  This needs to be moved out of the way
+	 * to make space for the runs to be copied from @src, i.e. the first
+	 * run of the tail of @dst.
+	 * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+	 * replaced run.  However, if @right, then one of @dst's runs is
+	 * already merged into @src.
+	 */
+	tail = loc + right + 1;
+	/*
+	 * First run after the @src runs that have been inserted, i.e. where
+	 * the tail of @dst needs to be moved to.
+	 * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.
+	 */
+	marker = loc + ssize - left;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+	ntfs_rl_mm(dst, marker, tail, dsize - tail);
 	ntfs_rl_mc(dst, loc, src, left, ssize - left);
 
-	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic].lcn == LCN_ENOENT)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* We may have changed the length of the file, so fix the end marker. */
+	if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	return dst;
 }
 
diff --git a/fs/open.c b/fs/open.c
index 2fac58c5191..f0d90cf0495 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -738,52 +738,15 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 	return error;
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc). 00 is
- * used by symlinks.
- */
-struct file *filp_open(const char * filename, int flags, int mode)
-{
-	int namei_flags, error;
-	struct nameidata nd;
-
-	namei_flags = flags;
-	if ((namei_flags+1) & O_ACCMODE)
-		namei_flags++;
-	if (namei_flags & O_TRUNC)
-		namei_flags |= 2;
-
-	error = open_namei(filename, namei_flags, mode, &nd);
-	if (!error)
-		return dentry_open(nd.dentry, nd.mnt, flags);
-
-	return ERR_PTR(error);
-}
-
-EXPORT_SYMBOL(filp_open);
-
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+					int flags, struct file *f)
 {
-	struct file * f;
 	struct inode *inode;
 	int error;
 
-	error = -ENFILE;
-	f = get_empty_filp();
-	if (!f)
-		goto cleanup_dentry;
 	f->f_flags = flags;
-	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
 		error = get_write_access(inode);
@@ -828,12 +791,63 @@ cleanup_all:
 	f->f_vfsmnt = NULL;
 cleanup_file:
 	put_filp(f);
-cleanup_dentry:
 	dput(dentry);
 	mntput(mnt);
 	return ERR_PTR(error);
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
+struct file *filp_open(const char * filename, int flags, int mode)
+{
+	int namei_flags, error;
+	struct nameidata nd;
+	struct file *f;
+
+	namei_flags = flags;
+	if ((namei_flags+1) & O_ACCMODE)
+		namei_flags++;
+	if (namei_flags & O_TRUNC)
+		namei_flags |= 2;
+
+	error = -ENFILE;
+	f = get_empty_filp();
+	if (f == NULL)
+		return ERR_PTR(error);
+
+	error = open_namei(filename, namei_flags, mode, &nd);
+	if (!error)
+		return __dentry_open(nd.dentry, nd.mnt, flags, f);
+
+	put_filp(f);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(filp_open);
+
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	int error;
+	struct file *f;
+
+	error = -ENFILE;
+	f = get_empty_filp();
+	if (f == NULL)
+		return ERR_PTR(error);
+
+	return __dentry_open(dentry, mnt, flags, f);
+}
 EXPORT_SYMBOL(dentry_open);
 
 /*
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d88d518d30f..d84eecacbea 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -74,6 +74,7 @@
 #include <linux/file.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -180,12 +181,14 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
 	task_lock(p);
+	rcu_read_lock();
 	if (p->files)
 		fdt = files_fdtable(p->files);
 	buffer += sprintf(buffer,
 		"FDSize:\t%d\n"
 		"Groups:\t",
 		fdt ? fdt->max_fds : 0);
+	rcu_read_unlock();
 
 	group_info = p->group_info;
 	get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 23db452ab42..3b33f94020d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -340,6 +340,54 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	return result;
 }
 
+
+/* Same as proc_root_link, but this addionally tries to get fs from other
+ * threads in the group */
+static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
+				struct vfsmount **mnt)
+{
+	struct fs_struct *fs;
+	int result = -ENOENT;
+	struct task_struct *leader = proc_task(inode);
+
+	task_lock(leader);
+	fs = leader->fs;
+	if (fs) {
+		atomic_inc(&fs->count);
+		task_unlock(leader);
+	} else {
+		/* Try to get fs from other threads */
+		task_unlock(leader);
+		read_lock(&tasklist_lock);
+		if (pid_alive(leader)) {
+			struct task_struct *task = leader;
+
+			while ((task = next_thread(task)) != leader) {
+				task_lock(task);
+				fs = task->fs;
+				if (fs) {
+					atomic_inc(&fs->count);
+					task_unlock(task);
+					break;
+				}
+				task_unlock(task);
+			}
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	if (fs) {
+		read_lock(&fs->lock);
+		*mnt = mntget(fs->rootmnt);
+		*dentry = dget(fs->root);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	return result;
+}
+
+
 #define MAY_PTRACE(task) \
 	(task == current || \
 	(task->parent == current && \
@@ -471,14 +519,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 
 /* permission checks */
 
-static int proc_check_root(struct inode *inode)
+/* If the process being read is separated by chroot from the reading process,
+ * don't let the reader access the threads.
+ */
+static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
 {
-	struct dentry *de, *base, *root;
-	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
+	struct dentry *de, *base;
+	struct vfsmount *our_vfsmnt, *mnt;
 	int res = 0;
-
-	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
-		return -ENOENT;
 	read_lock(&current->fs->lock);
 	our_vfsmnt = mntget(current->fs->rootmnt);
 	base = dget(current->fs->root);
@@ -511,6 +559,16 @@ out:
 	goto exit;
 }
 
+static int proc_check_root(struct inode *inode)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
+		return -ENOENT;
+	return proc_check_chroot(root, vfsmnt);
+}
+
 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	if (generic_permission(inode, mask, NULL) != 0)
@@ -518,6 +576,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return proc_check_root(inode);
 }
 
+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (generic_permission(inode, mask, NULL) != 0)
+		return -EACCES;
+
+	if (proc_task_root_link(inode, &root, &vfsmnt))
+		return -ENOENT;
+
+	return proc_check_chroot(root, vfsmnt);
+}
+
 extern struct seq_operations proc_pid_maps_op;
 static int maps_open(struct inode *inode, struct file *file)
 {
@@ -1419,7 +1491,7 @@ static struct inode_operations proc_fd_inode_operations = {
 
 static struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
-	.permission	= proc_permission,
+	.permission	= proc_task_permission,
 };
 
 #ifdef CONFIG_SECURITY
diff --git a/fs/read_write.c b/fs/read_write.c
index b60324aaa2b..a091ee4f430 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -499,6 +499,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret)
 		goto out;
+	ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE);
+	if (ret)
+		goto out;
 
 	fnv = NULL;
 	if (type == READ) {
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index c9f178fb494..c20babd6216 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -667,7 +667,7 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 	if (th->t_trans_id) {
 		int err;
 		// update any changes we made to blk count
-		reiserfs_update_sd(th, inode);
+		mark_inode_dirty(inode);
 		err =
 		    journal_end(th, inode->i_sb,
 				JOURNAL_PER_BALANCE_CNT * 3 + 1 +
@@ -855,17 +855,18 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han
 
 		if (th->t_trans_id) {
 			reiserfs_write_lock(inode->i_sb);
-			reiserfs_update_sd(th, inode);	// And update on-disk metadata
+			// this sets the proper flags for O_SYNC to trigger a commit
+			mark_inode_dirty(inode);
 			reiserfs_write_unlock(inode->i_sb);
 		} else
-			inode->i_sb->s_op->dirty_inode(inode);
+			mark_inode_dirty(inode);
 
 		sd_update = 1;
 	}
 	if (th->t_trans_id) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!sd_update)
-			reiserfs_update_sd(th, inode);
+			mark_inode_dirty(inode);
 		status = journal_end(th, th->t_super, th->t_blocks_allocated);
 		if (status)
 			retval = status;
@@ -1320,7 +1321,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 				return err;
 			}
 			reiserfs_update_inode_transaction(inode);
-			reiserfs_update_sd(&th, inode);
+			mark_inode_dirty(inode);
 			err = journal_end(&th, inode->i_sb, 1);
 			if (err) {
 				reiserfs_write_unlock(inode->i_sb);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 1a8a1bf2154..d76ee6c4f9b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2639,6 +2639,12 @@ static int reiserfs_commit_write(struct file *f, struct page *page,
 		}
 		reiserfs_update_inode_transaction(inode);
 		inode->i_size = pos;
+		/*
+		 * this will just nest into our transaction.  It's important
+		 * to use mark_inode_dirty so the inode gets pushed around on the
+		 * dirty lists, and so that O_SYNC works as expected
+		 */
+		mark_inode_dirty(inode);
 		reiserfs_update_sd(&myth, inode);
 		update_sd = 1;
 		ret = journal_end(&myth, inode->i_sb, 1);
@@ -2649,21 +2655,13 @@ static int reiserfs_commit_write(struct file *f, struct page *page,
 	if (th) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!update_sd)
-			reiserfs_update_sd(th, inode);
+			mark_inode_dirty(inode);
 		ret = reiserfs_end_persistent_transaction(th);
 		reiserfs_write_unlock(inode->i_sb);
 		if (ret)
 			goto out;
 	}
 
-	/* we test for O_SYNC here so we can commit the transaction
-	 ** for any packed tails the file might have had
-	 */
-	if (f && (f->f_flags & O_SYNC)) {
-		reiserfs_write_lock(inode->i_sb);
-		ret = reiserfs_commit_for_inode(inode);
-		reiserfs_write_unlock(inode->i_sb);
-	}
       out:
 	return ret;