From 31ec35d6c81175016a6372571eab23b6bd40b406 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 16 Nov 2006 20:54:20 +0000
Subject: [CIFS] Incorrect hardlink count when original file is cached
 (oplocked)

Fixes Samba bug 2823

In this case hardlink count is stale for one of the two inodes (ie the
original file) until it is closed - since revalidate does not go to
server while file is cached locally.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/link.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 0bee8b7e521..8e259969354 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -69,17 +69,30 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
 			rc = -EOPNOTSUPP;  
 	}
 
-/* if (!rc)     */
-	{
-		/*   renew_parental_timestamps(old_file);
-		   inode->i_nlink++;
-		   mark_inode_dirty(inode);
-		   d_instantiate(direntry, inode); */
-		/* BB add call to either mark inode dirty or refresh its data and timestamp to current time */
+	d_drop(direntry);	/* force new lookup from server of target */
+
+	/* if source file is cached (oplocked) revalidate will not go to server
+	   until the file is closed or oplock broken so update nlinks locally */
+	if(old_file->d_inode) {
+		cifsInode = CIFS_I(old_file->d_inode);
+		if(rc == 0) {
+			old_file->d_inode->i_nlink++;
+			old_file->d_inode->i_ctime = CURRENT_TIME;
+			/* parent dir timestamps will update from srv
+			within a second, would it really be worth it
+			to set the parent dir cifs inode time to zero
+			to force revalidate (faster) for it too? */
+		}
+		/* if not oplocked will force revalidate to get info 
+		   on source file from srv */
+		cifsInode->time = 0;
+
+                /* Will update parent dir timestamps from srv within a second.
+		   Would it really be worth it to set the parent dir (cifs
+		   inode) time field to zero to force revalidate on parent
+		   directory faster ie 
+			CIFS_I(inode)->time = 0;  */
 	}
-	d_drop(direntry);	/* force new lookup from server */
-	cifsInode = CIFS_I(old_file->d_inode);
-	cifsInode->time = 0;	/* will force revalidate to go get info when needed */
 
 cifs_hl_exit:
 	kfree(fromName);
-- 
cgit v1.2.3


From 8d6286fdfd290589f8446ec1503702227263dcfd Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 16 Nov 2006 22:48:25 +0000
Subject: [CIFS] Fix timezone handling on stat to os/2

We were adjusting for timezone on readdir but not on stat

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1ad8c9fcc74..c4fa91b8b62 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -318,6 +318,7 @@ int cifs_get_inode_info(struct inode **pinode,
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	char *tmp_path;
 	char *buf = NULL;
+	int adjustTZ = FALSE;
 
 	pTcon = cifs_sb->tcon;
 	cFYI(1,("Getting info on %s", search_path));
@@ -348,6 +349,7 @@ int cifs_get_inode_info(struct inode **pinode,
 					pfindData, cifs_sb->local_nls, 
 					cifs_sb->mnt_cifs_flags &
 					  CIFS_MOUNT_MAP_SPECIAL_CHR);
+			adjustTZ = TRUE;
 		}
 		
 	}
@@ -444,6 +446,10 @@ int cifs_get_inode_info(struct inode **pinode,
 		inode->i_ctime =
 		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
 		cFYI(0, ("Attributes came in as 0x%x", attr));
+		if(adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
+			inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
+	                inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
+		}
 
 		/* set default mode. will override for dirs below */
 		if (atomic_read(&cifsInfo->inUse) == 0)
-- 
cgit v1.2.3


From 52bad64d95bd89e08c49ec5a071fa6dcbe5a1a9c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:01 +0000
Subject: WorkStruct: Separate delayable and non-delayable events.

Separate delayable work items from non-delayable work items be splitting them
into a separate structure (delayed_work), which incorporates a work_struct and
the timer_list removed from work_struct.

The work_struct struct is huge, and this limits it's usefulness.  On a 64-bit
architecture it's nearly 100 bytes in size.  This reduces that by half for the
non-delayable type of event.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 fs/aio.c           | 4 ++--
 fs/nfs/client.c    | 2 +-
 fs/nfs/namespace.c | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 94766599db0..11a1a7100ad 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -876,7 +876,7 @@ static void aio_kick_handler(void *data)
 	 * we're in a worker thread already, don't use queue_delayed_work,
 	 */
 	if (requeue)
-		queue_work(aio_wq, &ctx->wq);
+		queue_delayed_work(aio_wq, &ctx->wq, 0);
 }
 
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5fea638743e..6f0487d6f44 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ec1114b33d8..5ed798bc1cf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -21,7 +21,8 @@
 static void nfs_expire_automounts(void *list);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
+			    &nfs_automount_list);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
-- 
cgit v1.2.3


From 65f27f38446e1976cc98fd3004b110fedcddd189 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:55:48 +0000
Subject: WorkStruct: Pass the work_struct pointer instead of context data

Pass the work_struct pointer to the work function rather than context data.
The work function can use container_of() to work out the data.

For the cases where the container of the work_struct may go away the moment the
pending bit is cleared, it is made possible to defer the release of the
structure by deferring the clearing of the pending bit.

To make this work, an extra flag is introduced into the management side of the
work_struct.  This governs auto-release of the structure upon execution.

Ordinarily, the work queue executor would release the work_struct for further
scheduling or deallocation by clearing the pending bit prior to jumping to the
work function.  This means that, unless the driver makes some guarantee itself
that the work_struct won't go away, the work function may not access anything
else in the work_struct or its container lest they be deallocated..  This is a
problem if the auxiliary data is taken away (as done by the last patch).

However, if the pending bit is *not* cleared before jumping to the work
function, then the work function *may* access the work_struct and its container
with no problems.  But then the work function must itself release the
work_struct by calling work_release().

In most cases, automatic release is fine, so this is the default.  Special
initiators exist for the non-auto-release case (ending in _NAR).


Signed-Off-By: David Howells <dhowells@redhat.com>
---
 fs/aio.c            | 14 +++++++-------
 fs/bio.c            |  6 +++---
 fs/file.c           |  6 ++++--
 fs/nfs/client.c     |  2 +-
 fs/nfs/namespace.c  |  9 ++++-----
 fs/nfs/nfs4_fs.h    |  2 +-
 fs/nfs/nfs4renewd.c |  5 +++--
 7 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 11a1a7100ad..ca1c5180a17 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -53,13 +53,13 @@ static kmem_cache_t	*kioctx_cachep;
 static struct workqueue_struct *aio_wq;
 
 /* Used for rare fput completion. */
-static void aio_fput_routine(void *);
-static DECLARE_WORK(fput_work, aio_fput_routine, NULL);
+static void aio_fput_routine(struct work_struct *);
+static DECLARE_WORK(fput_work, aio_fput_routine);
 
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -470,7 +470,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 		wake_up(&ctx->wait);
 }
 
-static void aio_fput_routine(void *data)
+static void aio_fput_routine(struct work_struct *data)
 {
 	spin_lock_irq(&fput_lock);
 	while (likely(!list_empty(&fput_head))) {
@@ -859,9 +859,9 @@ static inline void aio_run_all_iocbs(struct kioctx *ctx)
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+static void aio_kick_handler(struct work_struct *work)
 {
-	struct kioctx *ctx = data;
+	struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
 	mm_segment_t oldfs = get_fs();
 	int requeue;
 
diff --git a/fs/bio.c b/fs/bio.c
index f95c8749499..c6c07ca5b5a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -955,16 +955,16 @@ static void bio_release_pages(struct bio *bio)
  * run one bio_put() against the BIO.
  */
 
-static void bio_dirty_fn(void *data);
+static void bio_dirty_fn(struct work_struct *work);
 
-static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
+static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
 static DEFINE_SPINLOCK(bio_dirty_lock);
 static struct bio *bio_dirty_list;
 
 /*
  * This runs in process context
  */
-static void bio_dirty_fn(void *data)
+static void bio_dirty_fn(struct work_struct *work)
 {
 	unsigned long flags;
 	struct bio *bio;
diff --git a/fs/file.c b/fs/file.c
index 8e81775c5dc..3787e82f54c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,8 +91,10 @@ out:
 	spin_unlock(&fddef->lock);
 }
 
-static void free_fdtable_work(struct fdtable_defer *f)
+static void free_fdtable_work(struct work_struct *work)
 {
+	struct fdtable_defer *f =
+		container_of(work, struct fdtable_defer, wq);
 	struct fdtable *fdt;
 
 	spin_lock_bh(&f->lock);
@@ -351,7 +353,7 @@ static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
-	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	INIT_WORK(&fddef->wq, free_fdtable_work);
 	init_timer(&fddef->timer);
 	fddef->timer.data = (unsigned long)fddef;
 	fddef->timer.function = fdtable_timer;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 6f0487d6f44..23ab145daa2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5ed798bc1cf..371b804e7cc 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -18,11 +18,10 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_expire_automounts(void *list);
+static void nfs_expire_automounts(struct work_struct *work);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
-			    &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
@@ -165,9 +164,9 @@ struct inode_operations nfs_referral_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 };
 
-static void nfs_expire_automounts(void *data)
+static void nfs_expire_automounts(struct work_struct *work)
 {
-	struct list_head *list = (struct list_head *)data;
+	struct list_head *list = &nfs_automount_list;
 
 	mark_mounts_for_expiry(list);
 	if (!list_empty(list))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6f346677332..c26cd978c7c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -185,7 +185,7 @@ extern const u32 nfs4_fs_locations_bitmap[2];
 extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
-extern void nfs4_renew_state(void *);
+extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 7b6df1852e7..823298561c0 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,9 +59,10 @@
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
 void
-nfs4_renew_state(void *data)
+nfs4_renew_state(struct work_struct *work)
 {
-	struct nfs_client *clp = (struct nfs_client *)data;
+	struct nfs_client *clp =
+		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
-- 
cgit v1.2.3


From c4028958b6ecad064b1a6303a6a5906d4fe48d73 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:57:56 +0000
Subject: WorkStruct: make allyesconfig

Fix up for make allyesconfig.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 fs/9p/mux.c                     | 16 ++++-----
 fs/gfs2/glock.c                 |  8 ++---
 fs/ncpfs/inode.c                |  8 ++---
 fs/ncpfs/sock.c                 | 20 ++++++-----
 fs/nfsd/nfs4state.c             |  7 ++--
 fs/ocfs2/alloc.c                |  9 +++--
 fs/ocfs2/cluster/heartbeat.c    | 10 +++---
 fs/ocfs2/cluster/quorum.c       |  4 +--
 fs/ocfs2/cluster/tcp.c          | 78 +++++++++++++++++++++++------------------
 fs/ocfs2/cluster/tcp_internal.h |  8 ++---
 fs/ocfs2/dlm/dlmcommon.h        |  2 +-
 fs/ocfs2/dlm/dlmdomain.c        |  2 +-
 fs/ocfs2/dlm/dlmrecovery.c      |  5 +--
 fs/ocfs2/dlm/userdlm.c          | 10 +++---
 fs/ocfs2/journal.c              |  7 ++--
 fs/ocfs2/journal.h              |  2 +-
 fs/ocfs2/ocfs2.h                |  2 +-
 fs/ocfs2/super.c                |  2 +-
 fs/reiserfs/journal.c           | 12 ++++---
 fs/xfs/linux-2.6/xfs_aops.c     | 21 ++++++-----
 fs/xfs/linux-2.6/xfs_buf.c      |  9 ++---
 21 files changed, 134 insertions(+), 108 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 90a79c78454..944273c3dbf 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -110,8 +110,8 @@ struct v9fs_mux_rpc {
 };
 
 static int v9fs_poll_proc(void *);
-static void v9fs_read_work(void *);
-static void v9fs_write_work(void *);
+static void v9fs_read_work(struct work_struct *work);
+static void v9fs_write_work(struct work_struct *work);
 static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 			  poll_table * p);
 static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
@@ -297,8 +297,8 @@ struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
 	m->rbuf = NULL;
 	m->wpos = m->wsize = 0;
 	m->wbuf = NULL;
-	INIT_WORK(&m->rq, v9fs_read_work, m);
-	INIT_WORK(&m->wq, v9fs_write_work, m);
+	INIT_WORK(&m->rq, v9fs_read_work);
+	INIT_WORK(&m->wq, v9fs_write_work);
 	m->wsched = 0;
 	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
 	m->poll_task = NULL;
@@ -458,13 +458,13 @@ static int v9fs_poll_proc(void *a)
 /**
  * v9fs_write_work - called when a transport can send some data
  */
-static void v9fs_write_work(void *a)
+static void v9fs_write_work(struct work_struct *work)
 {
 	int n, err;
 	struct v9fs_mux_data *m;
 	struct v9fs_req *req;
 
-	m = a;
+	m = container_of(work, struct v9fs_mux_data, wq);
 
 	if (m->err < 0) {
 		clear_bit(Wworksched, &m->wsched);
@@ -564,7 +564,7 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 /**
  * v9fs_read_work - called when there is some data to be read from a transport
  */
-static void v9fs_read_work(void *a)
+static void v9fs_read_work(struct work_struct *work)
 {
 	int n, err;
 	struct v9fs_mux_data *m;
@@ -572,7 +572,7 @@ static void v9fs_read_work(void *a)
 	struct v9fs_fcall *rcall;
 	char *rbuf;
 
-	m = a;
+	m = container_of(work, struct v9fs_mux_data, rq);
 
 	if (m->err < 0)
 		return;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 78fe0fae23f..55f5333dae9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -35,7 +35,7 @@
 
 struct greedy {
 	struct gfs2_holder gr_gh;
-	struct work_struct gr_work;
+	struct delayed_work gr_work;
 };
 
 struct gfs2_gl_hash_bucket {
@@ -1368,9 +1368,9 @@ static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
 	glops->go_xmote_th(gl, state, flags);
 }
 
-static void greedy_work(void *data)
+static void greedy_work(struct work_struct *work)
 {
-	struct greedy *gr = data;
+	struct greedy *gr = container_of(work, struct greedy, gr_work.work);
 	struct gfs2_holder *gh = &gr->gr_gh;
 	struct gfs2_glock *gl = gh->gh_gl;
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
@@ -1422,7 +1422,7 @@ int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
 
 	gfs2_holder_init(gl, 0, 0, gh);
 	set_bit(HIF_GREEDY, &gh->gh_iflags);
-	INIT_WORK(&gr->gr_work, greedy_work, gr);
+	INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
 
 	set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
 	schedule_delayed_work(&gr->gr_work, time);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 42e3bef270c..72dad552aa0 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -577,12 +577,12 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 		server->rcv.ptr = (unsigned char*)&server->rcv.buf;
 		server->rcv.len = 10;
 		server->rcv.state = 0;
-		INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc, server);
-		INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc, server);
+		INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc);
+		INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc);
 		sock->sk->sk_write_space = ncp_tcp_write_space;
 	} else {
-		INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc, server);
-		INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc, server);
+		INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc);
+		INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc);
 		server->timeout_tm.data = (unsigned long)server;
 		server->timeout_tm.function = ncpdgram_timeout_call;
 	}
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 11c2b252ebe..e496d8b65e9 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -350,9 +350,10 @@ static void info_server(struct ncp_server *server, unsigned int id, const void *
 	}
 }
 
-void ncpdgram_rcv_proc(void *s)
+void ncpdgram_rcv_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, rcv.tq);
 	struct socket* sock;
 	
 	sock = server->ncp_sock;
@@ -468,9 +469,10 @@ static void __ncpdgram_timeout_proc(struct ncp_server *server)
 	}
 }
 
-void ncpdgram_timeout_proc(void *s)
+void ncpdgram_timeout_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, timeout_tq);
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncpdgram_timeout_proc(server);
 	mutex_unlock(&server->rcv.creq_mutex);
@@ -652,18 +654,20 @@ skipdata:;
 	}
 }
 
-void ncp_tcp_rcv_proc(void *s)
+void ncp_tcp_rcv_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, rcv.tq);
 
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_rcv_proc(server);
 	mutex_unlock(&server->rcv.creq_mutex);
 }
 
-void ncp_tcp_tx_proc(void *s)
+void ncp_tcp_tx_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, tx.tq);
 	
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_try_send(server);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 293b6495829..e431e93ab50 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1829,9 +1829,8 @@ out:
 }
 
 static struct workqueue_struct *laundry_wq;
-static struct work_struct laundromat_work;
-static void laundromat_main(void *);
-static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
+static void laundromat_main(struct work_struct *);
+static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
 
 __be32
 nfsd4_renew(clientid_t *clid)
@@ -1940,7 +1939,7 @@ nfs4_laundromat(void)
 }
 
 void
-laundromat_main(void *not_used)
+laundromat_main(struct work_struct *not_used)
 {
 	time_t t;
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f43bc5f18a3..0b2ad163005 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1205,10 +1205,12 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	return status;
 }
 
-static void ocfs2_truncate_log_worker(void *data)
+static void ocfs2_truncate_log_worker(struct work_struct *work)
 {
 	int status;
-	struct ocfs2_super *osb = data;
+	struct ocfs2_super *osb =
+		container_of(work, struct ocfs2_super,
+			     osb_truncate_log_wq.work);
 
 	mlog_entry_void();
 
@@ -1441,7 +1443,8 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
 	/* ocfs2_truncate_log_shutdown keys on the existence of
 	 * osb->osb_tl_inode so we don't set any of the osb variables
 	 * until we're sure all is well. */
-	INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
+	INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
+			  ocfs2_truncate_log_worker);
 	osb->osb_tl_bh    = tl_bh;
 	osb->osb_tl_inode = tl_inode;
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 305cba3681f..4cd9a958045 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -141,7 +141,7 @@ struct o2hb_region {
 	 * recognizes a node going up and down in one iteration */
 	u64			hr_generation;
 
-	struct work_struct	hr_write_timeout_work;
+	struct delayed_work	hr_write_timeout_work;
 	unsigned long		hr_last_timeout_start;
 
 	/* Used during o2hb_check_slot to hold a copy of the block
@@ -156,9 +156,11 @@ struct o2hb_bio_wait_ctxt {
 	int               wc_error;
 };
 
-static void o2hb_write_timeout(void *arg)
+static void o2hb_write_timeout(struct work_struct *work)
 {
-	struct o2hb_region *reg = arg;
+	struct o2hb_region *reg =
+		container_of(work, struct o2hb_region,
+			     hr_write_timeout_work.work);
 
 	mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
 	     "milliseconds\n", reg->hr_dev_name,
@@ -1404,7 +1406,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 		goto out;
 	}
 
-	INIT_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout, reg);
+	INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
 
 	/*
 	 * A node is considered live after it has beat LIVE_THRESHOLD
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 7bba98fbfc1..4705d659fe5 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -88,7 +88,7 @@ void o2quo_disk_timeout(void)
 	o2quo_fence_self();
 }
 
-static void o2quo_make_decision(void *arg)
+static void o2quo_make_decision(struct work_struct *work)
 {
 	int quorum;
 	int lowest_hb, lowest_reachable = 0, fence = 0;
@@ -306,7 +306,7 @@ void o2quo_init(void)
 	struct o2quo_state *qs = &o2quo_state;
 
 	spin_lock_init(&qs->qs_lock);
-	INIT_WORK(&qs->qs_work, o2quo_make_decision, NULL);
+	INIT_WORK(&qs->qs_work, o2quo_make_decision);
 }
 
 void o2quo_exit(void)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index b650efa8c8b..9b3209dc0b1 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -140,11 +140,11 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
 		 [O2NET_ERR_DIED]	= -EHOSTDOWN,};
 
 /* can't quite avoid *all* internal declarations :/ */
-static void o2net_sc_connect_completed(void *arg);
-static void o2net_rx_until_empty(void *arg);
-static void o2net_shutdown_sc(void *arg);
+static void o2net_sc_connect_completed(struct work_struct *work);
+static void o2net_rx_until_empty(struct work_struct *work);
+static void o2net_shutdown_sc(struct work_struct *work);
 static void o2net_listen_data_ready(struct sock *sk, int bytes);
-static void o2net_sc_send_keep_req(void *arg);
+static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
 
@@ -308,10 +308,10 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 	o2nm_node_get(node);
 	sc->sc_node = node;
 
-	INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed, sc);
-	INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty, sc);
-	INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc, sc);
-	INIT_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req, sc);
+	INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
+	INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
+	INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
+	INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req);
 
 	init_timer(&sc->sc_idle_timeout);
 	sc->sc_idle_timeout.function = o2net_idle_timer;
@@ -342,7 +342,7 @@ static void o2net_sc_queue_work(struct o2net_sock_container *sc,
 		sc_put(sc);
 }
 static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
-					struct work_struct *work,
+					struct delayed_work *work,
 					int delay)
 {
 	sc_get(sc);
@@ -350,7 +350,7 @@ static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
 		sc_put(sc);
 }
 static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
-					 struct work_struct *work)
+					 struct delayed_work *work)
 {
 	if (cancel_delayed_work(work))
 		sc_put(sc);
@@ -564,9 +564,11 @@ static void o2net_ensure_shutdown(struct o2net_node *nn,
  * ourselves as state_change couldn't get the nn_lock and call set_nn_state
  * itself.
  */
-static void o2net_shutdown_sc(void *arg)
+static void o2net_shutdown_sc(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_shutdown_work);
 	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 
 	sclog(sc, "shutting down\n");
@@ -1201,9 +1203,10 @@ out:
 /* this work func is triggerd by data ready.  it reads until it can read no
  * more.  it interprets 0, eof, as fatal.  if data_ready hits while we're doing
  * our work the work struct will be marked and we'll be called again. */
-static void o2net_rx_until_empty(void *arg)
+static void o2net_rx_until_empty(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container, sc_rx_work);
 	int ret;
 
 	do {
@@ -1249,9 +1252,11 @@ static int o2net_set_nodelay(struct socket *sock)
 
 /* called when a connect completes and after a sock is accepted.  the
  * rx path will see the response and mark the sc valid */
-static void o2net_sc_connect_completed(void *arg)
+static void o2net_sc_connect_completed(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_connect_work);
 
 	mlog(ML_MSG, "sc sending handshake with ver %llu id %llx\n",
               (unsigned long long)O2NET_PROTOCOL_VERSION,
@@ -1262,9 +1267,11 @@ static void o2net_sc_connect_completed(void *arg)
 }
 
 /* this is called as a work_struct func. */
-static void o2net_sc_send_keep_req(void *arg)
+static void o2net_sc_send_keep_req(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_keepalive_work.work);
 
 	o2net_sendpage(sc, o2net_keep_req, sizeof(*o2net_keep_req));
 	sc_put(sc);
@@ -1314,14 +1321,15 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
  * having a connect attempt fail, etc. This centralizes the logic which decides
  * if a connect attempt should be made or if we should give up and all future
  * transmit attempts should fail */
-static void o2net_start_connect(void *arg)
+static void o2net_start_connect(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_connect_work.work);
 	struct o2net_sock_container *sc = NULL;
 	struct o2nm_node *node = NULL, *mynode = NULL;
 	struct socket *sock = NULL;
 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
-	int ret = 0;
+	int ret = 0, stop;
 
 	/* if we're greater we initiate tx, otherwise we accept */
 	if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1342,10 +1350,9 @@ static void o2net_start_connect(void *arg)
 
 	spin_lock(&nn->nn_lock);
 	/* see if we already have one pending or have given up */
-	if (nn->nn_sc || nn->nn_persistent_error)
-		arg = NULL;
+	stop = (nn->nn_sc || nn->nn_persistent_error);
 	spin_unlock(&nn->nn_lock);
-	if (arg == NULL) /* *shrug*, needed some indicator */
+	if (stop)
 		goto out;
 
 	nn->nn_last_connect_attempt = jiffies;
@@ -1421,9 +1428,10 @@ out:
 	return;
 }
 
-static void o2net_connect_expired(void *arg)
+static void o2net_connect_expired(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_connect_expired.work);
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
@@ -1436,9 +1444,10 @@ static void o2net_connect_expired(void *arg)
 	spin_unlock(&nn->nn_lock);
 }
 
-static void o2net_still_up(void *arg)
+static void o2net_still_up(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_still_up.work);
 
 	o2quo_hb_still_up(o2net_num_from_nn(nn));
 }
@@ -1644,9 +1653,9 @@ out:
 	return ret;
 }
 
-static void o2net_accept_many(void *arg)
+static void o2net_accept_many(struct work_struct *work)
 {
-	struct socket *sock = arg;
+	struct socket *sock = o2net_listen_sock;
 	while (o2net_accept_one(sock) == 0)
 		cond_resched();
 }
@@ -1700,7 +1709,7 @@ static int o2net_open_listening_sock(__be16 port)
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
 	o2net_listen_sock = sock;
-	INIT_WORK(&o2net_listen_work, o2net_accept_many, sock);
+	INIT_WORK(&o2net_listen_work, o2net_accept_many);
 
 	sock->sk->sk_reuse = 1;
 	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
@@ -1819,9 +1828,10 @@ int o2net_init(void)
 		struct o2net_node *nn = o2net_nn_from_num(i);
 
 		spin_lock_init(&nn->nn_lock);
-		INIT_WORK(&nn->nn_connect_work, o2net_start_connect, nn);
-		INIT_WORK(&nn->nn_connect_expired, o2net_connect_expired, nn);
-		INIT_WORK(&nn->nn_still_up, o2net_still_up, nn);
+		INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
+		INIT_DELAYED_WORK(&nn->nn_connect_expired,
+				  o2net_connect_expired);
+		INIT_DELAYED_WORK(&nn->nn_still_up, o2net_still_up);
 		/* until we see hb from a node we'll return einval */
 		nn->nn_persistent_error = -ENOTCONN;
 		init_waitqueue_head(&nn->nn_sc_wq);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4b46aac7d24..daebbd3a2c8 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -86,18 +86,18 @@ struct o2net_node {
 	 * connect attempt fails and so can be self-arming.  shutdown is
 	 * careful to first mark the nn such that no connects will be attempted
 	 * before canceling delayed connect work and flushing the queue. */
-	struct work_struct		nn_connect_work;
+	struct delayed_work		nn_connect_work;
 	unsigned long			nn_last_connect_attempt;
 
 	/* this is queued as nodes come up and is canceled when a connection is
 	 * established.  this expiring gives up on the node and errors out
 	 * transmits */
-	struct work_struct		nn_connect_expired;
+	struct delayed_work		nn_connect_expired;
 
 	/* after we give up on a socket we wait a while before deciding
 	 * that it is still heartbeating and that we should do some
 	 * quorum work */
-	struct work_struct		nn_still_up;
+	struct delayed_work		nn_still_up;
 };
 
 struct o2net_sock_container {
@@ -129,7 +129,7 @@ struct o2net_sock_container {
 	struct work_struct	sc_shutdown_work;
 
 	struct timer_list	sc_idle_timeout;
-	struct work_struct	sc_keepalive_work;
+	struct delayed_work	sc_keepalive_work;
 
 	unsigned		sc_handshake_ok:1;
 
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index fa968180b07..6b6ff76538c 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -153,7 +153,7 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
  * called functions that cannot be directly called from the
  * net message handlers for some reason, usually because
  * they need to send net messages of their own. */
-void dlm_dispatch_work(void *data);
+void dlm_dispatch_work(struct work_struct *work);
 
 struct dlm_lock_resource;
 struct dlm_work_item;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8d1065f8b3b..637646e6922 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1296,7 +1296,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
 	spin_lock_init(&dlm->work_lock);
 	INIT_LIST_HEAD(&dlm->work_list);
-	INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work, dlm);
+	INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work);
 
 	kref_init(&dlm->dlm_refs);
 	dlm->dlm_state = DLM_CTXT_NEW;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9d950d7cea3..fb3e2b0817f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -153,9 +153,10 @@ static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
 }
 
 /* Worker function used during recovery. */
-void dlm_dispatch_work(void *data)
+void dlm_dispatch_work(struct work_struct *work)
 {
-	struct dlm_ctxt *dlm = (struct dlm_ctxt *)data;
+	struct dlm_ctxt *dlm =
+		container_of(work, struct dlm_ctxt, dispatched_work);
 	LIST_HEAD(tmp_list);
 	struct list_head *iter, *iter2;
 	struct dlm_work_item *item;
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index eead48bbfac..7d2f578b267 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -171,15 +171,14 @@ static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres)
 		BUG();
 }
 
-static void user_dlm_unblock_lock(void *opaque);
+static void user_dlm_unblock_lock(struct work_struct *work);
 
 static void __user_dlm_queue_lockres(struct user_lock_res *lockres)
 {
 	if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
 		user_dlm_grab_inode_ref(lockres);
 
-		INIT_WORK(&lockres->l_work, user_dlm_unblock_lock,
-			  lockres);
+		INIT_WORK(&lockres->l_work, user_dlm_unblock_lock);
 
 		queue_work(user_dlm_worker, &lockres->l_work);
 		lockres->l_flags |= USER_LOCK_QUEUED;
@@ -279,10 +278,11 @@ static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres)
 	iput(inode);
 }
 
-static void user_dlm_unblock_lock(void *opaque)
+static void user_dlm_unblock_lock(struct work_struct *work)
 {
 	int new_level, status;
-	struct user_lock_res *lockres = (struct user_lock_res *) opaque;
+	struct user_lock_res *lockres =
+		container_of(work, struct user_lock_res, l_work);
 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
 
 	mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fd9734def55..d95ee2720e6 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -911,11 +911,12 @@ struct ocfs2_la_recovery_item {
  * NOTE: This function can and will sleep on recovery of other nodes
  * during cluster locking, just like any other ocfs2 process.
  */
-void ocfs2_complete_recovery(void *data)
+void ocfs2_complete_recovery(struct work_struct *work)
 {
 	int ret;
-	struct ocfs2_super *osb = data;
-	struct ocfs2_journal *journal = osb->journal;
+	struct ocfs2_journal *journal =
+		container_of(work, struct ocfs2_journal, j_recovery_work);
+	struct ocfs2_super *osb = journal->j_osb;
 	struct ocfs2_dinode *la_dinode, *tl_dinode;
 	struct ocfs2_la_recovery_item *item;
 	struct list_head *p, *n;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2f3a6acdac4..5be161a4ad9 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -172,7 +172,7 @@ static inline void ocfs2_handle_set_sync(struct ocfs2_journal_handle *handle, in
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-void ocfs2_complete_recovery(void *data);
+void ocfs2_complete_recovery(struct work_struct *work);
 
 /*
  *  Journal Control:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 0462a7f4e21..9b1bad1d48e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -283,7 +283,7 @@ struct ocfs2_super
 	/* Truncate log info */
 	struct inode			*osb_tl_inode;
 	struct buffer_head		*osb_tl_bh;
-	struct work_struct		osb_truncate_log_wq;
+	struct delayed_work		osb_truncate_log_wq;
 
 	struct ocfs2_node_map		osb_recovering_orphan_dirs;
 	unsigned int			*osb_orphan_wipes;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 76b46ebbb10..9a8089030f5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1365,7 +1365,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	spin_lock_init(&journal->j_lock);
 	journal->j_trans_id = (unsigned long) 1;
 	INIT_LIST_HEAD(&journal->j_la_cleanups);
-	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery, osb);
+	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
 	journal->j_state = OCFS2_JOURNAL_FREE;
 
 	/* get some pseudo constants for clustersize bits */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 85ce2326830..cd1bb75ceb2 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -104,7 +104,7 @@ static int release_journal_dev(struct super_block *super,
 			       struct reiserfs_journal *journal);
 static int dirty_one_transaction(struct super_block *s,
 				 struct reiserfs_journal_list *jl);
-static void flush_async_commits(void *p);
+static void flush_async_commits(struct work_struct *work);
 static void queue_log_writer(struct super_block *s);
 
 /* values for join in do_journal_begin_r */
@@ -2836,7 +2836,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	if (reiserfs_mounted_fs_count <= 1)
 		commit_wq = create_workqueue("reiserfs");
 
-	INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
+	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
+	journal->j_work_sb = p_s_sb;
 	return 0;
       free_and_return:
 	free_journal_ram(p_s_sb);
@@ -3447,10 +3448,11 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
 /*
 ** writeback the pending async commits to disk
 */
-static void flush_async_commits(void *p)
+static void flush_async_commits(struct work_struct *work)
 {
-	struct super_block *p_s_sb = p;
-	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+	struct reiserfs_journal *journal =
+		container_of(work, struct reiserfs_journal, j_work.work);
+	struct super_block *p_s_sb = journal->j_work_sb;
 	struct reiserfs_journal_list *jl;
 	struct list_head *entry;
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 09360cf1e1f..8e6b56fc1ca 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,9 +149,10 @@ xfs_destroy_ioend(
  */
 STATIC void
 xfs_end_bio_delalloc(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 
 	xfs_destroy_ioend(ioend);
 }
@@ -161,9 +162,10 @@ xfs_end_bio_delalloc(
  */
 STATIC void
 xfs_end_bio_written(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 
 	xfs_destroy_ioend(ioend);
 }
@@ -176,9 +178,10 @@ xfs_end_bio_written(
  */
 STATIC void
 xfs_end_bio_unwritten(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 	bhv_vnode_t		*vp = ioend->io_vnode;
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
@@ -220,11 +223,11 @@ xfs_alloc_ioend(
 	ioend->io_size = 0;
 
 	if (type == IOMAP_UNWRITTEN)
-		INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
 	else if (type == IOMAP_DELAY)
-		INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
 	else
-		INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_written);
 
 	return ioend;
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d3382843698..eef4a0ba11e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -994,9 +994,10 @@ xfs_buf_wait_unpin(
 
 STATIC void
 xfs_buf_iodone_work(
-	void			*v)
+	struct work_struct	*work)
 {
-	xfs_buf_t		*bp = (xfs_buf_t *)v;
+	xfs_buf_t		*bp =
+		container_of(work, xfs_buf_t, b_iodone_work);
 
 	if (bp->b_iodone)
 		(*(bp->b_iodone))(bp);
@@ -1017,10 +1018,10 @@ xfs_buf_ioend(
 
 	if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
 		if (schedule) {
-			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
+			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
 			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
 		} else {
-			xfs_buf_iodone_work(bp);
+			xfs_buf_iodone_work(&bp->b_iodone_work);
 		}
 	} else {
 		up(&bp->b_iodonesema);
-- 
cgit v1.2.3


From 3cb2fccc5f48a4d6269dfd00b4db570fca2a04d5 Mon Sep 17 00:00:00 2001
From: Matt LaPlante <kernel1@cyberdogtech.com>
Date: Thu, 30 Nov 2006 05:22:59 +0100
Subject: Fix misc Kconfig typos

Fix various Kconfig typos.

Signed-off-by: Matt LaPlante <kernel1@cyberdogtech.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 7b1511d50b0..8bec76bbc0c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1145,7 +1145,7 @@ config BEFS_FS
 	help
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
 	  BeOS. Notable features include support for arbitrary attributes
-	  on files and directories, and database-like indeces on selected
+	  on files and directories, and database-like indices on selected
 	  attributes. (Also note that this driver doesn't make those features
 	  available at this time). It is a 64 bit filesystem, so it supports
 	  extremely large volumes and files.
-- 
cgit v1.2.3


From 0779bf2d2ecc4d9b1e9437ae659f50e6776a7666 Mon Sep 17 00:00:00 2001
From: Matt LaPlante <kernel1@cyberdogtech.com>
Date: Thu, 30 Nov 2006 05:24:39 +0100
Subject: Fix misc .c/.h comment typos

Fix various .c/.h typos in comments (no code changes).

Signed-off-by: Matt LaPlante <kernel1@cyberdogtech.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/reiserfs/journal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 85ce2326830..ac93174c963 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1464,7 +1464,7 @@ static int flush_journal_list(struct super_block *s,
 		}
 
 		/* if someone has this block in a newer transaction, just make
-		 ** sure they are commited, and don't try writing it to disk
+		 ** sure they are committed, and don't try writing it to disk
 		 */
 		if (pjl) {
 			if (atomic_read(&pjl->j_commit_left))
@@ -3384,7 +3384,7 @@ static int remove_from_transaction(struct super_block *p_s_sb,
 
 /*
 ** for any cnode in a journal list, it can only be dirtied of all the
-** transactions that include it are commited to disk.
+** transactions that include it are committed to disk.
 ** this checks through each transaction, and returns 1 if you are allowed to dirty,
 ** and 0 if you aren't
 **
@@ -3426,7 +3426,7 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
 }
 
 /* syncs the commit blocks, but does not force the real buffers to disk
-** will wait until the current transaction is done/commited before returning 
+** will wait until the current transaction is done/committed before returning 
 */
 int journal_end_sync(struct reiserfs_transaction_handle *th,
 		     struct super_block *p_s_sb, unsigned long nblocks)
-- 
cgit v1.2.3


From 93e06b4140cc018826bce4d97b0bf7c9ba05ae6e Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Thu, 30 Nov 2006 05:29:23 +0100
Subject: BUG_ON conversion for fs/aio.c

This patch converts a if () BUG(); construct to BUG_ON();
which occupies less space, uses unlikely and is safer when
BUG() is disabled.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/aio.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 94766599db0..277a5f2d18a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -367,8 +367,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
 {
 	unsigned nr_events = ctx->max_reqs;
 
-	if (unlikely(ctx->reqs_active))
-		BUG();
+	BUG_ON(ctx->reqs_active);
 
 	cancel_delayed_work(&ctx->wq);
 	flush_workqueue(aio_wq);
@@ -505,8 +504,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	assert_spin_locked(&ctx->ctx_lock);
 
 	req->ki_users --;
-	if (unlikely(req->ki_users < 0))
-		BUG();
+	BUG_ON(req->ki_users < 0);
 	if (likely(req->ki_users))
 		return 0;
 	list_del(&req->ki_list);		/* remove from active_reqs */
-- 
cgit v1.2.3


From 03a67a46af8647b2c7825107045ecae641e103d3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Thu, 30 Nov 2006 05:32:19 +0100
Subject: Fix typos in doc and comments

Changes persistant -> persistent. www.dictionary.com does not know
persistant (with an A), but should it be one of those things you can
spell in more than one correct way, let me know.

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/Kconfig          | 2 +-
 fs/jfs/jfs_filsys.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 8bec76bbc0c..b3b5aa0edff 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -972,7 +972,7 @@ config SYSFS
 
 	Some system agents rely on the information in sysfs to operate.
 	/sbin/hotplug uses device and object attributes in sysfs to assist in
-	delegating policy decisions, like persistantly naming devices.
+	delegating policy decisions, like persistently naming devices.
 
 	sysfs is currently used by the block subsystem to mount the root
 	partition.  If sysfs is disabled you must specify the boot device on
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 9901928668c..eb550b339bb 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -81,7 +81,7 @@
 #define	JFS_SWAP_BYTES		0x00100000	/* running on big endian computer */
 
 /* Directory index */
-#define JFS_DIR_INDEX		0x00200000	/* Persistant index for */
+#define JFS_DIR_INDEX		0x00200000	/* Persistent index for */
 						/* directory entries    */
 
 
-- 
cgit v1.2.3


From 3ca68df6ee61e1a2034f3307b9edb9b3d87e5ca1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:11:25 -0400
Subject: [GFS2] split gfs2_dinode into on-disk and host variants

The latter is used as part of gfs2-private part of struct inode.
It actually stores a lot of fields differently; for now the
declaration is just cloned, inode field is swtiched and changes
propagated.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 4 ++--
 fs/gfs2/ondisk.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d470e5286ec..191a3df93d6 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -48,7 +48,7 @@
 void gfs2_inode_attr_in(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode *di = &ip->i_di;
+	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
 
@@ -98,7 +98,7 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode *di = &ip->i_di;
+	struct gfs2_dinode_host *di = &ip->i_di;
 	gfs2_assert_withdraw(GFS2_SB(inode),
 		(di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
 	di->di_mode = inode->i_mode;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 1025960b0e6..52cb9a2dc05 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -153,7 +153,7 @@ void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
+void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 {
 	const struct gfs2_dinode *str = buf;
 
@@ -187,7 +187,7 @@ void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
 
 }
 
-void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
+void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf)
 {
 	struct gfs2_dinode *str = buf;
 
@@ -221,7 +221,7 @@ void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
 
 }
 
-void gfs2_dinode_print(const struct gfs2_dinode *di)
+void gfs2_dinode_print(const struct gfs2_dinode_host *di)
 {
 	gfs2_meta_header_print(&di->di_header);
 	gfs2_inum_print(&di->di_num);
-- 
cgit v1.2.3


From 5c6edb576f3800723bb65dbfaff82517089e32d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:33:01 -0400
Subject: [GFS2] gfs2_dinode_host fields are host-endian

Annotated scalar fields, dropped unused ones.  Note that
it's not at all obvious that we want to convert all of them
to host-endian...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 118dc693d11..1c876e0fb44 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -229,7 +229,7 @@ struct gfs2_inode {
 	unsigned long i_flags;		/* GIF_... */
 
 	u64 i_vn;
-	struct gfs2_dinode i_di; /* To be replaced by ref to block */
+	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
 
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
-- 
cgit v1.2.3


From f50dfaf78c01df3cc2d8819f07d6661915567bae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:45:02 -0400
Subject: [GFS2] split gfs2_sb

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/super.c  | 2 +-
 fs/gfs2/super.h  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1c876e0fb44..bd596ba74e5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -450,7 +450,7 @@ struct gfs2_sbd {
 	struct super_block *sd_vfs_meta;
 	struct kobject sd_kobj;
 	unsigned long sd_flags;	/* SDF_... */
-	struct gfs2_sb sd_sb;
+	struct gfs2_sb_host sd_sb;
 
 	/* Constants computed on mount */
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 52cb9a2dc05..5b32f1a3579 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -79,7 +79,7 @@ static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
 	pv(mh, mh_format, "%u");
 }
 
-void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
+void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6a78b1b32e2..52aa3221fc9 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -97,7 +97,7 @@ void gfs2_tune_init(struct gfs2_tune *gt)
  * changed.
  */
 
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
 	unsigned int x;
 
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 5bb443ae0f5..ac95064c1e5 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
 
 void gfs2_tune_init(struct gfs2_tune *gt);
 
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
 struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
 
-- 
cgit v1.2.3


From 68826664d12827d7a732192e2f00ba46fb899414 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:07:22 -0400
Subject: [GFS2] split and annotate gfs2_rgrp

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index bd596ba74e5..8ca7a7f3506 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -68,7 +68,7 @@ struct gfs2_rgrpd {
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
 	struct gfs2_rindex rd_ri;
-	struct gfs2_rgrp rd_rg;
+	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
 	unsigned int rd_bh_count;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 5b32f1a3579..3b156a15cd8 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -120,7 +120,7 @@ void gfs2_rindex_print(const struct gfs2_rindex *ri)
 	pv(ri, ri_bitbytes, "%u");
 }
 
-void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
+void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
 {
 	const struct gfs2_rgrp *str = buf;
 
@@ -131,7 +131,7 @@ void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
 	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
 }
 
-void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
+void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
 
-- 
cgit v1.2.3


From e697264709c86040271cdd7abee781d7adbb7f91 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:29:46 -0400
Subject: [GFS2] split and annotate gfs2_inum_range

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 4 ++--
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 191a3df93d6..7eb6b440da6 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -436,7 +436,7 @@ static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
 	struct buffer_head *bh;
-	struct gfs2_inum_range ir;
+	struct gfs2_inum_range_host ir;
 	int error;
 
 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
@@ -479,7 +479,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
 	struct gfs2_holder gh;
 	struct buffer_head *bh;
-	struct gfs2_inum_range ir;
+	struct gfs2_inum_range_host ir;
 	int error;
 
 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 3b156a15cd8..64f5f0c604a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -263,7 +263,7 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
 	lh->lh_hash = be32_to_cpu(str->lh_hash);
 }
 
-void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
+void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
 {
 	const struct gfs2_inum_range *str = buf;
 
@@ -271,7 +271,7 @@ void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
 	ir->ir_length = be64_to_cpu(str->ir_length);
 }
 
-void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
+void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
 {
 	struct gfs2_inum_range *str = buf;
 
-- 
cgit v1.2.3


From 551676226163379c217e8ec54bd287eab9b8521e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:47:13 -0400
Subject: [GFS2] split and annotate gfs2_log_head

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c    |  2 +-
 fs/gfs2/incore.h   |  2 +-
 fs/gfs2/lops.c     |  4 ++--
 fs/gfs2/lops.h     |  2 +-
 fs/gfs2/ondisk.c   |  2 +-
 fs/gfs2/recovery.c | 22 +++++++++++-----------
 fs/gfs2/recovery.h |  2 +-
 fs/gfs2/super.c    |  4 ++--
 8 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 41a6b6818a5..5406b193227 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -491,7 +491,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	int error;
 
 	if (gl->gl_state != LM_ST_UNLOCKED &&
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8ca7a7f3506..e69f3394a2c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -41,7 +41,7 @@ struct gfs2_log_operations {
 	void (*lo_before_commit) (struct gfs2_sbd *sdp);
 	void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
 	void (*lo_before_scan) (struct gfs2_jdesc *jd,
-				struct gfs2_log_header *head, int pass);
+				struct gfs2_log_header_host *head, int pass);
 	int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
 				 struct gfs2_log_descriptor *ld, __be64 *ptr,
 				 int pass);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ab6d1115f95..8a654cd253d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -182,7 +182,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 }
 
 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
-			       struct gfs2_log_header *head, int pass)
+			       struct gfs2_log_header_host *head, int pass)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 
@@ -328,7 +328,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
-				  struct gfs2_log_header *head, int pass)
+				  struct gfs2_log_header_host *head, int pass)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 5839c05ae6b..965bc65c7c6 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -60,7 +60,7 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 }
 
 static inline void lops_before_scan(struct gfs2_jdesc *jd,
-				    struct gfs2_log_header *head,
+				    struct gfs2_log_header_host *head,
 				    unsigned int pass)
 {
 	int x;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 64f5f0c604a..84b1ebc7569 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -251,7 +251,7 @@ void gfs2_dinode_print(const struct gfs2_dinode_host *di)
 	printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
 }
 
-void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
+void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 {
 	const struct gfs2_log_header *str = buf;
 
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 62cd223819b..44781624162 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -132,10 +132,10 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
  */
 
 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
-			  struct gfs2_log_header *head)
+			  struct gfs2_log_header_host *head)
 {
 	struct buffer_head *bh;
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	u32 hash;
 	int error;
 
@@ -143,7 +143,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 	if (error)
 		return error;
 
-	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
+	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));	/* XXX */
 	lh.lh_hash = 0;
 	hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
 	gfs2_log_header_in(&lh, bh->b_data);
@@ -174,7 +174,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
  */
 
 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
-			struct gfs2_log_header *head)
+			struct gfs2_log_header_host *head)
 {
 	unsigned int orig_blk = *blk;
 	int error;
@@ -205,10 +205,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
  * Returns: errno
  */
 
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
 	unsigned int blk = head->lh_blkno;
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	int error;
 
 	for (;;) {
@@ -245,9 +245,9 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
  * Returns: errno
  */
 
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-	struct gfs2_log_header lh_1, lh_m;
+	struct gfs2_log_header_host lh_1, lh_m;
 	u32 blk_1, blk_2, blk_m;
 	int error;
 
@@ -320,7 +320,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 		length = be32_to_cpu(ld->ld_length);
 
 		if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
-			struct gfs2_log_header lh;
+			struct gfs2_log_header_host lh;
 			error = get_log_header(jd, start, &lh);
 			if (!error) {
 				gfs2_replay_incr_blk(sdp, &start);
@@ -363,7 +363,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
  * Returns: errno
  */
 
-static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
@@ -425,7 +425,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	struct gfs2_holder j_gh, ji_gh, t_gh;
 	unsigned long t;
 	int ro = 0;
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 961feedf4d8..f7235e61c72 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -26,7 +26,7 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
 void gfs2_revoke_clean(struct gfs2_sbd *sdp);
 
 int gfs2_find_jhead(struct gfs2_jdesc *jd,
-		    struct gfs2_log_header *head);
+		    struct gfs2_log_header_host *head);
 int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
 void gfs2_check_journals(struct gfs2_sbd *sdp);
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 52aa3221fc9..0faf563c83a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -508,7 +508,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
 	struct gfs2_holder t_gh;
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	int error;
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
@@ -873,7 +873,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
 	struct gfs2_jdesc *jd;
 	struct lfcc *lfcc;
 	LIST_HEAD(list);
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	int error;
 
 	error = gfs2_jindex_hold(sdp, &ji_gh);
-- 
cgit v1.2.3


From 2a2c98247b822db8df037a56c27201f9d716ac66 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 14:44:50 -0500
Subject: [GFS2] Fix crc32 calculation in recovery.c

Commit "[GFS2] split and annotate gfs2_log_head" resulted in an incorrect
checksum calculation for log headers. This patch corrects the
problem without resorting to copying the whole log header as
the previous code used to.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/recovery.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 44781624162..4acf238e1db 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -136,6 +136,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 {
 	struct buffer_head *bh;
 	struct gfs2_log_header_host lh;
+static const u32 nothing = 0;
 	u32 hash;
 	int error;
 
@@ -143,11 +144,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 	if (error)
 		return error;
 
-	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));	/* XXX */
-	lh.lh_hash = 0;
-	hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
+	hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
+					     sizeof(u32));
+	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
+	hash ^= (u32)~0;
 	gfs2_log_header_in(&lh, bh->b_data);
-
 	brelse(bh);
 
 	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-- 
cgit v1.2.3


From e928a76f959e89884f6186bb6f846c533847d5df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:57:23 -0400
Subject: [GFS2] split and annotate gfs2_meta_header

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 84b1ebc7569..b5aa7ab97f2 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -54,7 +54,7 @@ static void gfs2_inum_print(const struct gfs2_inum *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
+static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -63,7 +63,7 @@ static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
 	mh->mh_format = be32_to_cpu(str->mh_format);
 }
 
-static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
+static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf)
 {
 	struct gfs2_meta_header *str = buf;
 
@@ -72,7 +72,7 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
 	str->mh_format = cpu_to_be32(mh->mh_format);
 }
 
-static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
+static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh)
 {
 	pv(mh, mh_magic, "0x%.8X");
 	pv(mh, mh_type, "%u");
-- 
cgit v1.2.3


From 1e81c4c3e0f55c95b6278a827262b80debd0dc7e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 22:51:24 -0400
Subject: [GFS2] split and annotate gfs_rindex

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 fs/gfs2/rgrp.c   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e69f3394a2c..e4afc2c4d60 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -67,7 +67,7 @@ struct gfs2_rgrpd {
 	struct list_head rd_list_mru;
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
-	struct gfs2_rindex rd_ri;
+	struct gfs2_rindex_host rd_ri;
 	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b5aa7ab97f2..c4e099d582f 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -97,7 +97,7 @@ void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
 }
 
-void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
+void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
 {
 	const struct gfs2_rindex *str = buf;
 
@@ -109,7 +109,7 @@ void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
 
 }
 
-void gfs2_rindex_print(const struct gfs2_rindex *ri)
+void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
 {
 	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
 	pv(ri, ri_length, "%u");
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b261385c006..07dfd630505 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -253,7 +253,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 
 }
 
-static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
 {
 	u64 first = ri->ri_data0;
 	u64 last = first + ri->ri_data;
-- 
cgit v1.2.3


From 629a21e7ecedf779c68dcaa9a186069f57a7c652 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 22:51:24 -0400
Subject: [GFS2] split and annotate gfs2_inum

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c        |  8 ++++----
 fs/gfs2/dir.h        |  8 ++++----
 fs/gfs2/incore.h     |  2 +-
 fs/gfs2/inode.c      | 22 +++++++++++-----------
 fs/gfs2/inode.h      |  4 ++--
 fs/gfs2/ondisk.c     |  6 +++---
 fs/gfs2/ops_dentry.c |  2 +-
 fs/gfs2/ops_export.c |  8 ++++----
 fs/gfs2/ops_export.h |  2 +-
 fs/gfs2/ops_file.c   |  2 +-
 fs/gfs2/ops_fstype.c |  4 ++--
 11 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index e24af28b1a1..d67a3760ca3 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1194,7 +1194,7 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
 			   int *copied)
 {
 	const struct gfs2_dirent *dent, *dent_next;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	u64 off, off_next;
 	unsigned int x, y;
 	int run = 0;
@@ -1456,7 +1456,7 @@ out:
  */
 
 int gfs2_dir_search(struct inode *dir, const struct qstr *name,
-		    struct gfs2_inum *inum, unsigned int *type)
+		    struct gfs2_inum_host *inum, unsigned int *type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
@@ -1531,7 +1531,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
  */
 
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-		 const struct gfs2_inum *inum, unsigned type)
+		 const struct gfs2_inum_host *inum, unsigned type)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *bh;
@@ -1666,7 +1666,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
  */
 
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum *inum, unsigned int new_type)
+		   struct gfs2_inum_host *inum, unsigned int new_type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 371233419b0..b21b33668a5 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -31,17 +31,17 @@ struct gfs2_inum;
 typedef int (*gfs2_filldir_t) (void *opaque,
 			      const char *name, unsigned int length,
 			      u64 offset,
-			      struct gfs2_inum *inum, unsigned int type);
+			      struct gfs2_inum_host *inum, unsigned int type);
 
 int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
-		    struct gfs2_inum *inum, unsigned int *type);
+		    struct gfs2_inum_host *inum, unsigned int *type);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-		 const struct gfs2_inum *inum, unsigned int type);
+		 const struct gfs2_inum_host *inum, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
 		  gfs2_filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum *new_inum, unsigned int new_type);
+		   struct gfs2_inum_host *new_inum, unsigned int new_type);
 
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e4afc2c4d60..20c9b4f0e52 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -224,7 +224,7 @@ enum {
 
 struct gfs2_inode {
 	struct inode i_inode;
-	struct gfs2_inum i_num;
+	struct gfs2_inum_host i_num;
 
 	unsigned long i_flags;		/* GIF_... */
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7eb6b440da6..dadd1f35c86 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -112,7 +112,7 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum *inum = opaque;
+	struct gfs2_inum_host *inum = opaque;
 
 	if (ip && ip->i_num.no_addr == inum->no_addr)
 		return 1;
@@ -123,19 +123,19 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum *inum = opaque;
+	struct gfs2_inum_host *inum = opaque;
 
 	ip->i_num = *inum;
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
 {
 	return ilookup5(sb, (unsigned long)inum->no_formal_ino,
 			iget_test, inum);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
+static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
 {
 	return iget5_locked(sb, (unsigned long)inum->no_formal_ino,
 		     iget_test, iget_set, inum);
@@ -150,7 +150,7 @@ static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
  * Returns: A VFS inode, or an error
  */
 
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
 {
 	struct inode *inode = gfs2_iget(sb, inum);
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -394,7 +394,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_holder d_gh;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error = 0;
 	struct inode *inode = NULL;
@@ -610,7 +610,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 		*gid = current->fsgid;
 }
 
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum,
+static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
 			u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -650,7 +650,7 @@ out:
  */
 
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-			const struct gfs2_inum *inum, unsigned int mode,
+			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
 			const u64 *generation)
 {
@@ -707,7 +707,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 }
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-		       unsigned int mode, const struct gfs2_inum *inum,
+		       unsigned int mode, const struct gfs2_inum_host *inum,
 		       const u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -866,7 +866,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
 	struct inode *dir = &dip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	int error;
 	u64 generation;
 
@@ -1018,7 +1018,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		   struct gfs2_inode *ip)
 {
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index f5d86176057..d699b920975 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -27,8 +27,8 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 void gfs2_inode_attr_out(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum);
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
 
 int gfs2_inode_refresh(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index c4e099d582f..32f592f4a3c 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -32,7 +32,7 @@
  * first arg: the cpu-order structure
  */
 
-void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
+void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
 {
 	const struct gfs2_inum *str = buf;
 
@@ -40,7 +40,7 @@ void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
 	no->no_addr = be64_to_cpu(str->no_addr);
 }
 
-void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
+void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
 {
 	struct gfs2_inum *str = buf;
 
@@ -48,7 +48,7 @@ void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
 	str->no_addr = cpu_to_be64(no->no_addr);
 }
 
-static void gfs2_inum_print(const struct gfs2_inum *no)
+static void gfs2_inum_print(const struct gfs2_inum_host *no)
 {
 	printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 00041b1b802..c36f9e342e6 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_holder d_gh;
 	struct gfs2_inode *ip;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error;
 
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 86127d93bd3..33423a5c329 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -35,7 +35,7 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     void *context)
 {
 	struct gfs2_fh_obj fh_obj;
-	struct gfs2_inum *this, parent;
+	struct gfs2_inum_host *this, parent;
 
 	if (fh_type != fh_len)
 		return NULL;
@@ -114,12 +114,12 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 }
 
 struct get_name_filldir {
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	char *name;
 };
 
 static int get_name_filldir(void *opaque, const char *name, unsigned int length,
-			    u64 offset, struct gfs2_inum *inum,
+			    u64 offset, struct gfs2_inum_host *inum,
 			    unsigned int type)
 {
 	struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
@@ -202,7 +202,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
-	struct gfs2_inum *inum = &fh_obj->this;
+	struct gfs2_inum_host *inum = &fh_obj->this;
 	struct gfs2_holder i_gh, ri_gh, rgd_gh;
 	struct gfs2_rgrpd *rgd;
 	struct inode *inode;
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
index 09aca5046fb..f925a955b3b 100644
--- a/fs/gfs2/ops_export.h
+++ b/fs/gfs2/ops_export.h
@@ -15,7 +15,7 @@
 
 extern struct export_operations gfs2_export_ops;
 struct gfs2_fh_obj {
-	struct gfs2_inum this;
+	struct gfs2_inum_host this;
 	__u32            imode;
 };
 
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3064f133bf3..359965c368b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -139,7 +139,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
  */
 
 static int filldir_func(void *opaque, const char *name, unsigned int length,
-			u64 offset, struct gfs2_inum *inum,
+			u64 offset, struct gfs2_inum_host *inum,
 			unsigned int type)
 {
 	struct filldir_reg *fdr = (struct filldir_reg *)opaque;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 882873a6bd6..d14e139d267 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -237,7 +237,7 @@ fail:
 }
 
 static struct inode *gfs2_lookup_root(struct super_block *sb,
-				      struct gfs2_inum *inum)
+				      struct gfs2_inum_host *inum)
 {
 	return gfs2_inode_lookup(sb, inum, DT_DIR);
 }
@@ -246,7 +246,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder sb_gh;
-	struct gfs2_inum *inum;
+	struct gfs2_inum_host *inum;
 	struct inode *inode;
 	int error = 0;
 
-- 
cgit v1.2.3


From b5bc9e8b065dbcd4c675e8c158d6e524f221b8e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:31:55 -0400
Subject: [GFS2] split and annotate gfs2_quota

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/quota.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 32f592f4a3c..5db0737fcdb 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -144,7 +144,7 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
-void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
+void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 {
 	const struct gfs2_quota *str = buf;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3deae7416c..e3f5b8da484 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -743,7 +743,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
 	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
 	struct gfs2_holder i_gh;
-	struct gfs2_quota q;
+	struct gfs2_quota_host q;
 	char buf[sizeof(struct gfs2_quota)];
 	struct file_ra_state ra_state;
 	int error;
-- 
cgit v1.2.3


From bd209cc017f231e8536550bdab1bf5da93c32798 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:43:19 -0400
Subject: [GFS2] split and annotate gfs2_statfs_change

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h    |  4 ++--
 fs/gfs2/ondisk.c    |  4 ++--
 fs/gfs2/ops_super.c |  2 +-
 fs/gfs2/super.c     | 22 +++++++++++-----------
 fs/gfs2/super.h     |  4 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 20c9b4f0e52..c0a8c3b6a85 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -503,8 +503,8 @@ struct gfs2_sbd {
 
 	spinlock_t sd_statfs_spin;
 	struct mutex sd_statfs_mutex;
-	struct gfs2_statfs_change sd_statfs_master;
-	struct gfs2_statfs_change sd_statfs_local;
+	struct gfs2_statfs_change_host sd_statfs_master;
+	struct gfs2_statfs_change_host sd_statfs_local;
 	unsigned long sd_statfs_sync_time;
 
 	/* Resource group stuff */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 5db0737fcdb..84c59b165b6 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -279,7 +279,7 @@ void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
 	str->ir_length = cpu_to_be64(ir->ir_length);
 }
 
-void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
+void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
 {
 	const struct gfs2_statfs_change *str = buf;
 
@@ -288,7 +288,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
 	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
 }
 
-void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 {
 	struct gfs2_statfs_change *str = buf;
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index b47d9598c04..9c786d1e702 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -215,7 +215,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_inode->i_sb;
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_statfs_change sc;
+	struct gfs2_statfs_change_host sc;
 	int error;
 
 	if (gfs2_tune_get(sdp, gt_statfs_slow))
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0faf563c83a..0ef83172708 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -587,9 +587,9 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct buffer_head *m_bh, *l_bh;
 	struct gfs2_holder gh;
 	int error;
@@ -634,7 +634,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
 			s64 dinodes)
 {
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct buffer_head *l_bh;
 	int error;
 
@@ -660,8 +660,8 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct gfs2_holder gh;
 	struct buffer_head *m_bh, *l_bh;
 	int error;
@@ -727,10 +727,10 @@ out:
  * Returns: errno
  */
 
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 {
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 
 	spin_lock(&sdp->sd_statfs_spin);
 
@@ -760,7 +760,7 @@ int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
  */
 
 static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
-			    struct gfs2_statfs_change *sc)
+			    struct gfs2_statfs_change_host *sc)
 {
 	gfs2_rgrp_verify(rgd);
 	sc->sc_total += rgd->rd_ri.ri_data;
@@ -782,7 +782,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
  * Returns: errno
  */
 
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 {
 	struct gfs2_holder ri_gh;
 	struct gfs2_rgrpd *rgd_next;
@@ -792,7 +792,7 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
 	int done;
 	int error = 0, err;
 
-	memset(sc, 0, sizeof(struct gfs2_statfs_change));
+	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
 	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
 	if (!gha)
 		return -ENOMEM;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index ac95064c1e5..e590b2df11d 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -45,8 +45,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
 			s64 total, s64 free, s64 dinodes);
 int gfs2_statfs_sync(struct gfs2_sbd *sdp);
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
 
 int gfs2_freeze_fs(struct gfs2_sbd *sdp);
 void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
-- 
cgit v1.2.3


From b62f963e1fdf838fed91faec21228d421a834f2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:46:46 -0400
Subject: [GFS2] split and annotate gfs2_quota_change

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/quota.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 84c59b165b6..2d1682ded4b 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -297,7 +297,7 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
 }
 
-void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf)
+void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
 {
 	const struct gfs2_quota_change *str = buf;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3f5b8da484..009d86c0008 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1103,7 +1103,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
 
 		for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
 		     y++, slot++) {
-			struct gfs2_quota_change qc;
+			struct gfs2_quota_change_host qc;
 			struct gfs2_quota_data *qd;
 
 			gfs2_quota_change_in(&qc, bh->b_data +
-- 
cgit v1.2.3


From b44b84d765b02f813a67b96bf79e3b5d4d621631 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 14 Oct 2006 10:46:30 -0400
Subject: [GFS2] gfs2 misc endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c       | 32 ++++++++++++++++----------------
 fs/gfs2/dir.c        | 24 +++++++++++++-----------
 fs/gfs2/eattr.c      | 27 ++++++++++++++-------------
 fs/gfs2/eattr.h      |  6 +++---
 fs/gfs2/inode.c      |  9 +++++----
 fs/gfs2/ops_export.c | 30 ++++++++++++------------------
 fs/gfs2/quota.c      |  3 +--
 fs/gfs2/util.h       |  6 ++----
 8 files changed, 66 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 06e9a8cb45e..51f6356bdcb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -38,8 +38,8 @@ struct metapath {
 };
 
 typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
-			     struct buffer_head *bh, u64 *top,
-			     u64 *bottom, unsigned int height,
+			     struct buffer_head *bh, __be64 *top,
+			     __be64 *bottom, unsigned int height,
 			     void *data);
 
 struct strip_mine {
@@ -230,7 +230,7 @@ static int build_height(struct inode *inode, unsigned height)
 	struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
 	struct gfs2_dinode *di;
 	int error;
-	u64 *bp;
+	__be64 *bp;
 	u64 bn;
 	unsigned n;
 
@@ -255,7 +255,7 @@ static int build_height(struct inode *inode, unsigned height)
 					  GFS2_FORMAT_IN);
 			gfs2_buffer_clear_tail(blocks[n],
 					       sizeof(struct gfs2_meta_header));
-			bp = (u64 *)(blocks[n]->b_data +
+			bp = (__be64 *)(blocks[n]->b_data +
 				     sizeof(struct gfs2_meta_header));
 			*bp = cpu_to_be64(blocks[n+1]->b_blocknr);
 			brelse(blocks[n]);
@@ -360,15 +360,15 @@ static void find_metapath(struct gfs2_inode *ip, u64 block,
  * metadata tree.
  */
 
-static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
+static inline __be64 *metapointer(struct buffer_head *bh, int *boundary,
 			       unsigned int height, const struct metapath *mp)
 {
 	unsigned int head_size = (height > 0) ?
 		sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
-	u64 *ptr;
+	__be64 *ptr;
 	*boundary = 0;
-	ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
-	if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size))
+	ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
+	if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
 		*boundary = 1;
 	return ptr;
 }
@@ -394,7 +394,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 			int *new, u64 *block)
 {
 	int boundary;
-	u64 *ptr = metapointer(bh, &boundary, height, mp);
+	__be64 *ptr = metapointer(bh, &boundary, height, mp);
 
 	if (*ptr) {
 		*block = be64_to_cpu(*ptr);
@@ -600,7 +600,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *bh = NULL;
-	u64 *top, *bottom;
+	__be64 *top, *bottom;
 	u64 bn;
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
@@ -611,17 +611,17 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 			return error;
 		dibh = bh;
 
-		top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
-		bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
 	} else {
 		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
 		if (error)
 			return error;
 
-		top = (u64 *)(bh->b_data + mh_size) +
+		top = (__be64 *)(bh->b_data + mh_size) +
 				  (first ? mp->mp_list[height] : 0);
 
-		bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
 	}
 
 	error = bc(ip, dibh, bh, top, bottom, height, data);
@@ -660,7 +660,7 @@ out:
  */
 
 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
-		    struct buffer_head *bh, u64 *top, u64 *bottom,
+		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
 		    unsigned int height, void *data)
 {
 	struct strip_mine *sm = data;
@@ -668,7 +668,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	struct gfs2_rgrp_list rlist;
 	u64 bn, bstart;
 	u32 blen;
-	u64 *p;
+	__be64 *p;
 	unsigned int rg_blocks = 0;
 	int metadata;
 	unsigned int revokes = 0;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index d67a3760ca3..59dc823c283 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -713,12 +713,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
 		       u64 *leaf_out)
 {
-	u64 leaf_no;
+	__be64 leaf_no;
 	int error;
 
 	error = gfs2_dir_read_data(dip, (char *)&leaf_no,
-				    index * sizeof(u64),
-				    sizeof(u64), 0);
+				    index * sizeof(__be64),
+				    sizeof(__be64), 0);
 	if (error != sizeof(u64))
 		return (error < 0) ? error : -EIO;
 
@@ -837,7 +837,8 @@ static int dir_make_exhash(struct inode *inode)
 	struct gfs2_leaf *leaf;
 	int y;
 	u32 x;
-	u64 *lp, bn;
+	__be64 *lp;
+	u64 bn;
 	int error;
 
 	error = gfs2_meta_inode_buffer(dip, &dibh);
@@ -893,7 +894,7 @@ static int dir_make_exhash(struct inode *inode)
 	gfs2_trans_add_bh(dip->i_gl, dibh, 1);
 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 
-	lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
+	lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
 
 	for (x = sdp->sd_hash_ptrs; x--; lp++)
 		*lp = cpu_to_be64(bn);
@@ -929,7 +930,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	struct gfs2_leaf *nleaf, *oleaf;
 	struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
 	u32 start, len, half_len, divider;
-	u64 bn, *lp, leaf_no;
+	u64 bn, leaf_no;
+	__be64 *lp;
 	u32 index;
 	int x, moved = 0;
 	int error;
@@ -974,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	/* Change the pointers.
 	   Don't bother distinguishing stuffed from non-stuffed.
 	   This code is complicated enough already. */
-	lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL);
+	lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL);
 	/*  Change the pointers  */
 	for (x = 0; x < half_len; x++)
 		lp[x] = cpu_to_be64(bn);
@@ -1341,7 +1343,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 	u32 hsize, len = 0;
 	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 hash, index;
-	u64 *lp;
+	__be64 *lp;
 	int copied = 0;
 	int error = 0;
 	unsigned depth = 0;
@@ -1365,7 +1367,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 
 		if (ht_offset_cur != ht_offset) {
 			error = gfs2_dir_read_data(dip, (char *)lp,
-						ht_offset * sizeof(u64),
+						ht_offset * sizeof(__be64),
 						sdp->sd_hash_bsize, 1);
 			if (error != sdp->sd_hash_bsize) {
 				if (error >= 0)
@@ -1715,7 +1717,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 	u32 hsize, len;
 	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 index = 0;
-	u64 *lp;
+	__be64 *lp;
 	u64 leaf_no;
 	int error = 0;
 
@@ -1735,7 +1737,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 
 		if (ht_offset_cur != ht_offset) {
 			error = gfs2_dir_read_data(dip, (char *)lp,
-						ht_offset * sizeof(u64),
+						ht_offset * sizeof(__be64),
 						sdp->sd_hash_bsize, 1);
 			if (error != sdp->sd_hash_bsize) {
 				if (error >= 0)
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index a65a4ccfd4d..518f0c0786c 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -112,7 +112,7 @@ fail:
 static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 {
 	struct buffer_head *bh, *eabh;
-	u64 *eablk, *end;
+	__be64 *eablk, *end;
 	int error;
 
 	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
@@ -129,7 +129,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 		goto out;
 	}
 
-	eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
 	end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
 
 	for (; eablk < end; eablk++) {
@@ -224,7 +224,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder rg_gh;
 	struct buffer_head *dibh;
-	u64 *dataptrs, bn = 0;
+	__be64 *dataptrs;
+	u64 bn = 0;
 	u64 bstart = 0;
 	unsigned int blen = 0;
 	unsigned int blks = 0;
@@ -444,7 +445,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 	struct buffer_head **bh;
 	unsigned int amount = GFS2_EA_DATA_LEN(ea);
 	unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-	u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
 	unsigned int x;
 	int error = 0;
 
@@ -629,7 +630,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 		ea->ea_num_ptrs = 0;
 		memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
 	} else {
-		u64 *dataptr = GFS2_EA2DATAPTRS(ea);
+		__be64 *dataptr = GFS2_EA2DATAPTRS(ea);
 		const char *data = er->er_data;
 		unsigned int data_len = er->er_data_len;
 		unsigned int copy;
@@ -931,12 +932,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *indbh, *newbh;
-	u64 *eablk;
+	__be64 *eablk;
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
 
 	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
-		u64 *end;
+		__be64 *end;
 
 		error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
 				       &indbh);
@@ -948,7 +949,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 			goto out;
 		}
 
-		eablk = (u64 *)(indbh->b_data + mh_size);
+		eablk = (__be64 *)(indbh->b_data + mh_size);
 		end = eablk + sdp->sd_inptrs;
 
 		for (; eablk < end; eablk++)
@@ -971,7 +972,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 		gfs2_buffer_clear_tail(indbh, mh_size);
 
-		eablk = (u64 *)(indbh->b_data + mh_size);
+		eablk = (__be64 *)(indbh->b_data + mh_size);
 		*eablk = cpu_to_be64(ip->i_di.di_eattr);
 		ip->i_di.di_eattr = blk;
 		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
@@ -1202,7 +1203,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 	struct buffer_head **bh;
 	unsigned int amount = GFS2_EA_DATA_LEN(ea);
 	unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-	u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
 	unsigned int x;
 	int error;
 
@@ -1300,7 +1301,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrp_list rlist;
 	struct buffer_head *indbh, *dibh;
-	u64 *eablk, *end;
+	__be64 *eablk, *end;
 	unsigned int rg_blocks = 0;
 	u64 bstart = 0;
 	unsigned int blen = 0;
@@ -1319,7 +1320,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 		goto out;
 	}
 
-	eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
 	end = eablk + sdp->sd_inptrs;
 
 	for (; eablk < end; eablk++) {
@@ -1363,7 +1364,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 
 	gfs2_trans_add_bh(ip->i_gl, indbh, 1);
 
-	eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
 	bstart = 0;
 	blen = 0;
 
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
index ffa65947d68..c82dbe01d71 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/eattr.h
@@ -19,7 +19,7 @@ struct iattr;
 #define GFS2_EA_SIZE(ea) \
 ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
       ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
-                                  (sizeof(u64) * (ea)->ea_num_ptrs)), 8)
+                                  (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
 
 #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
 #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -29,13 +29,13 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
 
 #define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
 ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
-      sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
+      sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
 
 #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
 #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
 
 #define GFS2_EA2DATAPTRS(ea) \
-((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
+((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
 
 #define GFS2_EA2NEXT(ea) \
 ((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index dadd1f35c86..fb969302f18 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -500,21 +500,22 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
 	if (!ir.ir_length) {
 		struct buffer_head *m_bh;
 		u64 x, y;
+		__be64 z;
 
 		error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 		if (error)
 			goto out_brelse;
 
-		x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
-		x = y = be64_to_cpu(x);
+		z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
+		x = y = be64_to_cpu(z);
 		ir.ir_start = x;
 		ir.ir_length = GFS2_INUM_QUANTUM;
 		x += GFS2_INUM_QUANTUM;
 		if (x < y)
 			gfs2_consist_inode(m_ip);
-		x = cpu_to_be64(x);
+		z = cpu_to_be64(x);
 		gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
-		*(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
+		*(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
 
 		brelse(m_bh);
 	}
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 33423a5c329..b4e7b877531 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -27,13 +27,14 @@
 #include "util.h"
 
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
-				     __u32 *fh,
+				     __u32 *p,
 				     int fh_len,
 				     int fh_type,
 				     int (*acceptable)(void *context,
 						       struct dentry *dentry),
 				     void *context)
 {
+	__be32 *fh = (__force __be32 *)p;
 	struct gfs2_fh_obj fh_obj;
 	struct gfs2_inum_host *this, parent;
 
@@ -65,9 +66,10 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 						    acceptable, context);
 }
 
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 			  int connectable)
 {
+	__be32 *fh = (__force __be32 *)p;
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -76,14 +78,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 	    (connectable && *len < GFS2_LARGE_FH_SIZE))
 		return 255;
 
-	fh[0] = ip->i_num.no_formal_ino >> 32;
-	fh[0] = cpu_to_be32(fh[0]);
-	fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
-	fh[1] = cpu_to_be32(fh[1]);
-	fh[2] = ip->i_num.no_addr >> 32;
-	fh[2] = cpu_to_be32(fh[2]);
-	fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
-	fh[3] = cpu_to_be32(fh[3]);
+	fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+	fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+	fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
+	fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
 	*len = GFS2_SMALL_FH_SIZE;
 
 	if (!connectable || inode == sb->s_root->d_inode)
@@ -95,14 +93,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 	igrab(inode);
 	spin_unlock(&dentry->d_lock);
 
-	fh[4] = ip->i_num.no_formal_ino >> 32;
-	fh[4] = cpu_to_be32(fh[4]);
-	fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
-	fh[5] = cpu_to_be32(fh[5]);
-	fh[6] = ip->i_num.no_addr >> 32;
-	fh[6] = cpu_to_be32(fh[6]);
-	fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
-	fh[7] = cpu_to_be32(fh[7]);
+	fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+	fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+	fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
+	fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
 
 	fh[8]  = cpu_to_be32(inode->i_mode);
 	fh[9]  = 0;	/* pad to double word */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 009d86c0008..5d00e9b2097 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -539,8 +539,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
 		qc->qc_id = cpu_to_be32(qd->qd_id);
 	}
 
-	x = qc->qc_change;
-	x = be64_to_cpu(x) + change;
+	x = be64_to_cpu(qc->qc_change) + change;
 	qc->qc_change = cpu_to_be64(x);
 
 	spin_lock(&sdp->sd_quota_spin);
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 76a50899fe9..ca8667c3ed0 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -83,8 +83,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
 				    char *file, unsigned int line)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
-	u32 magic = mh->mh_magic;
-	magic = be32_to_cpu(magic);
+	u32 magic = be32_to_cpu(mh->mh_magic);
 	if (unlikely(magic != GFS2_MAGIC))
 		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
 					  file, line);
@@ -107,9 +106,8 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
 					char *file, unsigned int line)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
-	u32 magic = mh->mh_magic;
+	u32 magic = be32_to_cpu(mh->mh_magic);
 	u16 t = be32_to_cpu(mh->mh_type);
-	magic = be32_to_cpu(magic);
 	if (unlikely(magic != GFS2_MAGIC))
 		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
 					  file, line);
-- 
cgit v1.2.3


From 9c9ab3d5414653bfe5e5b9f4dfdaab0c6ab17196 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:49:23 -0400
Subject: [GFS2] gfs2 __user misannotation fix

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 359965c368b..2fc8868e065 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -71,7 +71,7 @@ static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
 		size = count;
 
 	kaddr = kmap(page);
-	memcpy(desc->arg.buf, kaddr + offset, size);
+	memcpy(desc->arg.data, kaddr + offset, size);
 	kunmap(page);
 
 	desc->count = count - size;
@@ -86,7 +86,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
 	struct inode *inode = &ip->i_inode;
 	read_descriptor_t desc;
 	desc.written = 0;
-	desc.arg.buf = buf;
+	desc.arg.data = buf;
 	desc.count = size;
 	desc.error = 0;
 	do_generic_mapping_read(inode->i_mapping, ra_state,
-- 
cgit v1.2.3


From 539e5d6b7ae8612c0393fe940d2da5b591318d3d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:07:05 -0500
Subject: [GFS2] Change argument of gfs2_dinode_out

Everywhere this was called, a struct gfs2_inode was available,
but despite that, it was always called with a struct gfs2_dinode
as an argument. By making this change it paves the way to start
eliminating fields duplicated between the kernel's struct inode
and the struct gfs2_dinode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c       |  2 +-
 fs/gfs2/bmap.c      | 12 ++++++------
 fs/gfs2/dir.c       | 20 ++++++++++----------
 fs/gfs2/eattr.c     | 14 +++++++-------
 fs/gfs2/inode.c     |  6 +++---
 fs/gfs2/ondisk.c    |  5 ++++-
 fs/gfs2/ops_file.c  |  2 +-
 fs/gfs2/ops_inode.c | 10 +++++-----
 8 files changed, 37 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 5f959b8ce40..906e403b054 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -201,7 +201,7 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 				(ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
 		ip->i_di.di_mode = mode;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 51f6356bdcb..8c092ab2b4b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -498,7 +498,7 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 			error = gfs2_meta_inode_buffer(ip, &dibh);
 			if (!error) {
 				gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-				gfs2_dinode_out(&ip->i_di, dibh->b_data);
+				gfs2_dinode_out(ip, dibh->b_data);
 				brelse(dibh);
 			}
 			set_buffer_new(bh_map);
@@ -780,7 +780,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 
 	up_write(&ip->i_rw_mutex);
 
@@ -860,7 +860,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 		goto out_end_trans;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 out_end_trans:
@@ -970,7 +970,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 		ip->i_di.di_size = size;
 		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
 		error = 1;
 
@@ -983,7 +983,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-			gfs2_dinode_out(&ip->i_di, dibh->b_data);
+			gfs2_dinode_out(ip, dibh->b_data);
 		}
 	}
 
@@ -1057,7 +1057,7 @@ static int trunc_end(struct gfs2_inode *ip)
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 out:
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 59dc823c283..0742761e1e0 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -132,7 +132,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
 
@@ -232,7 +232,7 @@ out:
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 	return copied;
@@ -907,7 +907,7 @@ static int dir_make_exhash(struct inode *inode)
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_di.di_depth = y;
 
-	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	gfs2_dinode_out(dip, dibh->b_data);
 
 	brelse(dibh);
 
@@ -1039,7 +1039,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
 		dip->i_di.di_blocks++;
-		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1119,7 +1119,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(sdp, !error)) {
 		dip->i_di.di_depth++;
-		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1517,7 +1517,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 		return error;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_blocks++;
-	gfs2_dinode_out(&ip->i_di, bh->b_data);
+	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	return 0;
 }
@@ -1561,7 +1561,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
 			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-			gfs2_dinode_out(&ip->i_di, bh->b_data);
+			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
 			break;
@@ -1647,7 +1647,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
 	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&dip->i_di, bh->b_data);
+	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
 
@@ -1695,7 +1695,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	}
 
 	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&dip->i_di, bh->b_data);
+	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
 }
@@ -1875,7 +1875,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		goto out_end_trans;
 
 	gfs2_trans_add_bh(dip->i_gl, dibh, 1);
-	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	gfs2_dinode_out(dip, dibh->b_data);
 	brelse(dibh);
 
 out_end_trans:
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 518f0c0786c..9b7bb565b59 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -302,7 +302,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (!error) {
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		}
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 	}
 	ip->i_di.di_ctime = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 out:
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1132,7 +1132,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 	if (!error) {
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1287,7 +1287,7 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
 		gfs2_inode_attr_out(ip);
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1397,7 +1397,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1446,7 +1446,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index fb969302f18..b861ddba868 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -338,7 +338,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	ip->i_inode.i_nlink = nlink;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	mark_inode_dirty(&ip->i_inode);
 
@@ -792,7 +792,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 		goto fail_end_trans;
 	ip->i_di.di_nlink = 1;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	return 0;
 
@@ -1349,7 +1349,7 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 		gfs2_inode_attr_out(ip);
 
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 	return error;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 2d1682ded4b..2c50fa0261f 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -15,6 +15,8 @@
 
 #include "gfs2.h"
 #include <linux/gfs2_ondisk.h>
+#include <linux/lm_interface.h>
+#include "incore.h"
 
 #define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
 				       struct->member);
@@ -187,8 +189,9 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 
 }
 
-void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf)
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
+	const struct gfs2_dinode_host *di = &ip->i_di;
 	struct gfs2_dinode *str = buf;
 
 	gfs2_meta_header_out(&di->di_header, buf);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 2fc8868e065..7ea41757390 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -336,7 +336,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 		goto out_trans_end;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_flags = new_flags;
-	gfs2_dinode_out(&ip->i_di, bh->b_data);
+	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 out_trans_end:
 	gfs2_trans_end(sdp);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index ef6e5ed70e9..bd268852c67 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -339,7 +339,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
 	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
 		       size);
 		brelse(dibh);
@@ -414,7 +414,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		gfs2_inum_out(&dip->i_num, &dent->de_inum);
 		dent->de_type = cpu_to_be16(DT_DIR);
 
-		gfs2_dinode_out(&ip->i_di, di);
+		gfs2_dinode_out(ip, di);
 
 		brelse(dibh);
 	}
@@ -541,7 +541,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
 	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -762,7 +762,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_end_trans;
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -949,7 +949,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	gfs2_inode_attr_out(ip);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-- 
cgit v1.2.3


From 891ea14712da68e282de8583e5fa14f0d3f3731e Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:22:10 -0500
Subject: [GFS2] Change argument to gfs2_dinode_in

This is a preliminary patch to enable the removal of fields
in gfs2_dinode_host which are duplicated in struct inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b861ddba868..9875e9356cd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -229,7 +229,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	gfs2_dinode_in(&ip->i_di, dibh->b_data);
+	gfs2_dinode_in(ip, dibh->b_data);
 
 	brelse(dibh);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 2c50fa0261f..edf87567bb2 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -155,8 +155,9 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
+void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
+	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 
 	gfs2_meta_header_in(&di->di_header, buf);
@@ -186,7 +187,6 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 	di->di_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
-
 }
 
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-- 
cgit v1.2.3


From ea744d01c6a5acf1f6171b4c6e1658a742063613 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:28:00 -0500
Subject: [GFS2] Move gfs2_dinode_in to inode.c

gfs2_dinode_in() is only ever called from one place, so move it
to that place (in inode.c) and make it static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 34 ++++++++++++++++++++++++++++++++++
 fs/gfs2/ondisk.c | 36 +-----------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9875e9356cd..b39cfcfe927 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -208,6 +208,40 @@ fail:
 	return ERR_PTR(error);
 }
 
+static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+	struct gfs2_dinode_host *di = &ip->i_di;
+	const struct gfs2_dinode *str = buf;
+
+	gfs2_meta_header_in(&di->di_header, buf);
+	gfs2_inum_in(&di->di_num, &str->di_num);
+
+	di->di_mode = be32_to_cpu(str->di_mode);
+	di->di_uid = be32_to_cpu(str->di_uid);
+	di->di_gid = be32_to_cpu(str->di_gid);
+	di->di_nlink = be32_to_cpu(str->di_nlink);
+	di->di_size = be64_to_cpu(str->di_size);
+	di->di_blocks = be64_to_cpu(str->di_blocks);
+	di->di_atime = be64_to_cpu(str->di_atime);
+	di->di_mtime = be64_to_cpu(str->di_mtime);
+	di->di_ctime = be64_to_cpu(str->di_ctime);
+	di->di_major = be32_to_cpu(str->di_major);
+	di->di_minor = be32_to_cpu(str->di_minor);
+
+	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
+	di->di_goal_data = be64_to_cpu(str->di_goal_data);
+	di->di_generation = be64_to_cpu(str->di_generation);
+
+	di->di_flags = be32_to_cpu(str->di_flags);
+	di->di_payload_format = be32_to_cpu(str->di_payload_format);
+	di->di_height = be16_to_cpu(str->di_height);
+
+	di->di_depth = be16_to_cpu(str->di_depth);
+	di->di_entries = be32_to_cpu(str->di_entries);
+
+	di->di_eattr = be64_to_cpu(str->di_eattr);
+}
+
 /**
  * gfs2_inode_refresh - Refresh the incore copy of the dinode
  * @ip: The GFS2 inode
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index edf87567bb2..062a44f87fe 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
+void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -155,40 +155,6 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
-{
-	struct gfs2_dinode_host *di = &ip->i_di;
-	const struct gfs2_dinode *str = buf;
-
-	gfs2_meta_header_in(&di->di_header, buf);
-	gfs2_inum_in(&di->di_num, &str->di_num);
-
-	di->di_mode = be32_to_cpu(str->di_mode);
-	di->di_uid = be32_to_cpu(str->di_uid);
-	di->di_gid = be32_to_cpu(str->di_gid);
-	di->di_nlink = be32_to_cpu(str->di_nlink);
-	di->di_size = be64_to_cpu(str->di_size);
-	di->di_blocks = be64_to_cpu(str->di_blocks);
-	di->di_atime = be64_to_cpu(str->di_atime);
-	di->di_mtime = be64_to_cpu(str->di_mtime);
-	di->di_ctime = be64_to_cpu(str->di_ctime);
-	di->di_major = be32_to_cpu(str->di_major);
-	di->di_minor = be32_to_cpu(str->di_minor);
-
-	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
-	di->di_goal_data = be64_to_cpu(str->di_goal_data);
-	di->di_generation = be64_to_cpu(str->di_generation);
-
-	di->di_flags = be32_to_cpu(str->di_flags);
-	di->di_payload_format = be32_to_cpu(str->di_payload_format);
-	di->di_height = be16_to_cpu(str->di_height);
-
-	di->di_depth = be16_to_cpu(str->di_depth);
-	di->di_entries = be32_to_cpu(str->di_entries);
-
-	di->di_eattr = be64_to_cpu(str->di_eattr);
-}
-
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
 	const struct gfs2_dinode_host *di = &ip->i_di;
-- 
cgit v1.2.3


From 4cc14f0b88bf3e0b508143e091eb5a8dff3e3b9c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 19:00:24 -0500
Subject: [GFS2] Change argument to gfs2_dinode_print

Change argument for gfs2_dinode_print in order to prepare
for removal of duplicate fields between struct inode and
struct gfs2_dinode_host.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 8 ++++----
 fs/gfs2/ondisk.c    | 4 +++-
 fs/gfs2/ops_inode.c | 4 ++--
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b39cfcfe927..4c5d286fefd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -269,7 +269,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 
 	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
@@ -289,7 +289,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 
 	if (ip->i_di.di_blocks != 1) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
@@ -359,7 +359,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	   bigger than the old one, we must have underflowed. */
 	if (diff < 0 && nlink > ip->i_di.di_nlink) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
@@ -1010,7 +1010,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 
 	if (ip->i_di.di_entries != 2) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 062a44f87fe..77bed440833 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -190,8 +190,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 }
 
-void gfs2_dinode_print(const struct gfs2_dinode_host *di)
+void gfs2_dinode_print(const struct gfs2_inode *ip)
 {
+	const struct gfs2_dinode_host *di = &ip->i_di;
+
 	gfs2_meta_header_print(&di->di_header);
 	gfs2_inum_print(&di->di_num);
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index bd268852c67..b2c2fe613d7 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -467,7 +467,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 
 	if (ip->i_di.di_entries < 2) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		error = -EIO;
 		goto out_gunlock;
 	}
@@ -640,7 +640,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (S_ISDIR(nip->i_di.di_mode)) {
 			if (nip->i_di.di_entries < 2) {
 				if (gfs2_consist_inode(nip))
-					gfs2_dinode_print(&nip->i_di);
+					gfs2_dinode_print(nip);
 				error = -EIO;
 				goto out_gunlock;
 			}
-- 
cgit v1.2.3


From af339c0241d0dd3b35f9097b4f4999bb22ffe502 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 10:34:15 -0500
Subject: [GFS2] Shrink gfs2_inode (1) - di_header/di_num

The metadata header doesn't need to be stored in the incore
struct gfs2_inode since its constant, and this patch removes it.
Also, there is already a field for the inode's number in the
struct gfs2_inode, so we don't need one in struct gfs2_dinode_host
as well.

This saves 28 bytes of space in the struct gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 25 +++++++++++--------------
 fs/gfs2/ondisk.c | 22 +++++++++-------------
 2 files changed, 20 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4c5d286fefd..7ba05fc553a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -208,13 +208,18 @@ fail:
 	return ERR_PTR(error);
 }
 
-static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
 	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 
-	gfs2_meta_header_in(&di->di_header, buf);
-	gfs2_inum_in(&di->di_num, &str->di_num);
+	if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(ip);
+		return -EIO;
+	}
+	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
+		return -ESTALE;
 
 	di->di_mode = be32_to_cpu(str->di_mode);
 	di->di_uid = be32_to_cpu(str->di_uid);
@@ -240,6 +245,7 @@ static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
+	return 0;
 }
 
 /**
@@ -263,21 +269,12 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	gfs2_dinode_in(ip, dibh->b_data);
+	error = gfs2_dinode_in(ip, dibh->b_data);
 
 	brelse(dibh);
-
-	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
-		return -ESTALE;
-
 	ip->i_vn = ip->i_gl->gl_vn;
 
-	return 0;
+	return error;
 }
 
 int gfs2_dinode_dealloc(struct gfs2_inode *ip)
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 77bed440833..4bc590edb60 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
+static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -74,13 +74,6 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *b
 	str->mh_format = cpu_to_be32(mh->mh_format);
 }
 
-static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh)
-{
-	pv(mh, mh_magic, "0x%.8X");
-	pv(mh, mh_type, "%u");
-	pv(mh, mh_format, "%u");
-}
-
 void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
@@ -160,8 +153,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	const struct gfs2_dinode_host *di = &ip->i_di;
 	struct gfs2_dinode *str = buf;
 
-	gfs2_meta_header_out(&di->di_header, buf);
-	gfs2_inum_out(&di->di_num, (char *)&str->di_num);
+	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+	str->di_header.__pad0 = 0;
+	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+	str->di_header.__pad1 = 0;
+
+	gfs2_inum_out(&ip->i_num, &str->di_num);
 
 	str->di_mode = cpu_to_be32(di->di_mode);
 	str->di_uid = cpu_to_be32(di->di_uid);
@@ -187,15 +185,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_entries = cpu_to_be32(di->di_entries);
 
 	str->di_eattr = cpu_to_be64(di->di_eattr);
-
 }
 
 void gfs2_dinode_print(const struct gfs2_inode *ip)
 {
 	const struct gfs2_dinode_host *di = &ip->i_di;
 
-	gfs2_meta_header_print(&di->di_header);
-	gfs2_inum_print(&di->di_num);
+	gfs2_inum_print(&ip->i_num);
 
 	pv(di, di_mode, "0%o");
 	pv(di, di_uid, "%u");
-- 
cgit v1.2.3


From e7f14f4d094ea1a9ce1953375f5bc1500c760c79 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 21:45:08 -0500
Subject: [GFS2] Shrink gfs2_inode (2) - di_major/di_minor

This removes the device numbers from this structure by using
inode->i_rdev instead. It also cleans up the code in gfs2_mknod.
It results in shrinking the gfs2_inode by 8 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 36 ++++++++++++++++--------------------
 fs/gfs2/inode.h     |  2 +-
 fs/gfs2/ondisk.c    |  4 ----
 fs/gfs2/ops_inode.c | 38 +++++---------------------------------
 4 files changed, 22 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7ba05fc553a..a9959195654 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,17 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-
-	switch (di->di_mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		inode->i_rdev = MKDEV(di->di_major, di->di_minor);
-		break;
-	default:
-		inode->i_rdev = 0;
-		break;
-	};
-
 	inode->i_mode = di->di_mode;
 	inode->i_nlink = di->di_nlink;
 	inode->i_uid = di->di_uid;
@@ -222,6 +211,15 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 		return -ESTALE;
 
 	di->di_mode = be32_to_cpu(str->di_mode);
+	ip->i_inode.i_rdev = 0;
+	switch (di->di_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+					   be32_to_cpu(str->di_minor));
+		break;
+	};
+
 	di->di_uid = be32_to_cpu(str->di_uid);
 	di->di_gid = be32_to_cpu(str->di_gid);
 	di->di_nlink = be32_to_cpu(str->di_nlink);
@@ -230,8 +228,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_atime = be64_to_cpu(str->di_atime);
 	di->di_mtime = be64_to_cpu(str->di_mtime);
 	di->di_ctime = be64_to_cpu(str->di_ctime);
-	di->di_major = be32_to_cpu(str->di_major);
-	di->di_minor = be32_to_cpu(str->di_minor);
 
 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -270,7 +266,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 	}
 
 	error = gfs2_dinode_in(ip, dibh->b_data);
-
 	brelse(dibh);
 	ip->i_vn = ip->i_gl->gl_vn;
 
@@ -684,7 +679,7 @@ out:
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
-			const u64 *generation)
+			const u64 *generation, dev_t dev)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
@@ -705,7 +700,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_size = cpu_to_be64(0);
 	di->di_blocks = cpu_to_be64(1);
 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
-	di->di_major = di->di_minor = cpu_to_be32(0);
+	di->di_major = cpu_to_be32(MAJOR(dev));
+	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
 	di->di_generation = cpu_to_be64(*generation);
 	di->di_flags = cpu_to_be32(0);
@@ -740,7 +736,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 		       unsigned int mode, const struct gfs2_inum_host *inum,
-		       const u64 *generation)
+		       const u64 *generation, dev_t dev)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	unsigned int uid, gid;
@@ -761,7 +757,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	if (error)
 		goto out_quota;
 
-	init_dinode(dip, gl, inum, mode, uid, gid, generation);
+	init_dinode(dip, gl, inum, mode, uid, gid, generation, dev);
 	gfs2_quota_change(dip, +1, uid, gid);
 	gfs2_trans_end(sdp);
 
@@ -892,7 +888,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
  */
 
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode)
+			   unsigned int mode, dev_t dev)
 {
 	struct inode *inode;
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
@@ -950,7 +946,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			goto fail_gunlock;
 	}
 
-	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation);
+	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
 	if (error)
 		goto fail_gunlock2;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d699b920975..33c9ea68f7e 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -37,7 +37,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 			   int is_root, struct nameidata *nd);
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode);
+			   unsigned int mode, dev_t dev);
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 		struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 4bc590edb60..60dd943761a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -170,8 +170,6 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_atime = cpu_to_be64(di->di_atime);
 	str->di_mtime = cpu_to_be64(di->di_mtime);
 	str->di_ctime = cpu_to_be64(di->di_ctime);
-	str->di_major = cpu_to_be32(di->di_major);
-	str->di_minor = cpu_to_be32(di->di_minor);
 
 	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
 	str->di_goal_data = cpu_to_be64(di->di_goal_data);
@@ -202,8 +200,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
 	printk(KERN_INFO "  di_mtime = %lld\n", (long long)di->di_mtime);
 	printk(KERN_INFO "  di_ctime = %lld\n", (long long)di->di_ctime);
-	pv(di, di_major, "%u");
-	pv(di, di_minor, "%u");
 
 	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index b2c2fe613d7..c10b914bf8c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -59,7 +59,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
 	for (;;) {
-		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
+		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
 		if (!IS_ERR(inode)) {
 			gfs2_trans_end(sdp);
 			if (dip->i_alloc.al_rgd)
@@ -326,7 +326,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
+	inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
@@ -379,7 +379,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
+	inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
@@ -504,47 +504,19 @@ out:
 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
 		      dev_t dev)
 {
-	struct gfs2_inode *dip = GFS2_I(dir), *ip;
+	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_sbd *sdp = GFS2_SB(dir);
 	struct gfs2_holder ghs[2];
 	struct inode *inode;
-	struct buffer_head *dibh;
-	u32 major = 0, minor = 0;
-	int error;
-
-	switch (mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		major = MAJOR(dev);
-		minor = MINOR(dev);
-		break;
-	case S_IFIFO:
-	case S_IFSOCK:
-		break;
-	default:
-		return -EOPNOTSUPP;
-	};
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, mode);
+	inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
 	}
 
-	ip = ghs[1].gh_gl->gl_object;
-
-	ip->i_di.di_major = major;
-	ip->i_di.di_minor = minor;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-
-	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
-
 	gfs2_trans_end(sdp);
 	if (dip->i_alloc.al_rgd)
 		gfs2_inplace_release(dip);
-- 
cgit v1.2.3


From b60623c238b6a819bd04090139704e2cb57a751f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 12:22:46 -0500
Subject: [GFS2] Shrink gfs2_inode (3) - di_mode

This removes the duplicate di_mode field in favour of using the
inode->i_mode field. This saves 4 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c         | 16 ++++++++--------
 fs/gfs2/bmap.c        |  2 +-
 fs/gfs2/eaops.c       |  2 +-
 fs/gfs2/eattr.c       |  8 ++++----
 fs/gfs2/glock.c       |  2 +-
 fs/gfs2/glops.c       |  8 ++++----
 fs/gfs2/inode.c       | 18 +++++++-----------
 fs/gfs2/inode.h       |  2 +-
 fs/gfs2/ondisk.c      |  3 +--
 fs/gfs2/ops_address.c |  1 -
 fs/gfs2/ops_dentry.c  |  2 +-
 fs/gfs2/ops_file.c    |  6 +++---
 fs/gfs2/ops_inode.c   | 17 ++++++++---------
 fs/gfs2/ops_super.c   |  2 +-
 14 files changed, 41 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 906e403b054..87f6304f2f5 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -76,9 +76,9 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
 		return -EOPNOTSUPP;
 	if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
 		return -EPERM;
-	if (S_ISLNK(ip->i_di.di_mode))
+	if (S_ISLNK(ip->i_inode.i_mode))
 		return -EOPNOTSUPP;
-	if (!access && !S_ISDIR(ip->i_di.di_mode))
+	if (!access && !S_ISDIR(ip->i_inode.i_mode))
 		return -EACCES;
 
 	return 0;
@@ -198,8 +198,8 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_assert_withdraw(sdp,
-				(ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
-		ip->i_di.di_mode = mode;
+				(ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT));
+		ip->i_inode.i_mode = mode;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -215,12 +215,12 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct posix_acl *acl = NULL, *clone;
 	struct gfs2_ea_request er;
-	mode_t mode = ip->i_di.di_mode;
+	mode_t mode = ip->i_inode.i_mode;
 	int error;
 
 	if (!sdp->sd_args.ar_posix_acl)
 		return 0;
-	if (S_ISLNK(ip->i_di.di_mode))
+	if (S_ISLNK(ip->i_inode.i_mode))
 		return 0;
 
 	memset(&er, 0, sizeof(struct gfs2_ea_request));
@@ -232,7 +232,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 		return error;
 	if (!acl) {
 		mode &= ~current->fs->umask;
-		if (mode != ip->i_di.di_mode)
+		if (mode != ip->i_inode.i_mode)
 			error = munge_mode(ip, mode);
 		return error;
 	}
@@ -244,7 +244,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 	posix_acl_release(acl);
 	acl = clone;
 
-	if (S_ISDIR(ip->i_di.di_mode)) {
+	if (S_ISDIR(ip->i_inode.i_mode)) {
 		er.er_name = GFS2_POSIX_ACL_DEFAULT;
 		er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
 		error = gfs2_system_eaops.eo_set(ip, &er);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8c092ab2b4b..481a0688254 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1109,7 +1109,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
 {
 	int error;
 
-	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode)))
+	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
 		return -EINVAL;
 
 	if (size > ip->i_di.di_size)
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index 92c54e9b0dc..cd747c00f67 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -120,7 +120,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 
 	if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
 		if (!(er->er_flags & GFS2_ERF_MODE)) {
-			er->er_mode = ip->i_di.di_mode;
+			er->er_mode = ip->i_inode.i_mode;
 			er->er_flags |= GFS2_ERF_MODE;
 		}
 		error = gfs2_acl_validate_set(ip, 1, er,
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 9b7bb565b59..5208fa94aad 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -711,9 +711,9 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (!error) {
 		if (er->er_flags & GFS2_ERF_MODE) {
 			gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
-					    (ip->i_di.di_mode & S_IFMT) ==
+					    (ip->i_inode.i_mode & S_IFMT) ==
 					    (er->er_mode & S_IFMT));
-			ip->i_di.di_mode = er->er_mode;
+			ip->i_inode.i_mode = er->er_mode;
 		}
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -847,8 +847,8 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	if (er->er_flags & GFS2_ERF_MODE) {
 		gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
-			(ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
-		ip->i_di.di_mode = er->er_mode;
+			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
+		ip->i_inode.i_mode = er->er_mode;
 	}
 	ip->i_di.di_ctime = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 78fe0fae23f..44633c46717 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2078,7 +2078,7 @@ static int dump_inode(struct gfs2_inode *ip)
 	printk(KERN_INFO "    num = %llu %llu\n",
 		    (unsigned long long)ip->i_num.no_formal_ino,
 		    (unsigned long long)ip->i_num.no_addr);
-	printk(KERN_INFO "    type = %u\n", IF2DT(ip->i_di.di_mode));
+	printk(KERN_INFO "    type = %u\n", IF2DT(ip->i_inode.i_mode));
 	printk(KERN_INFO "    i_flags =");
 	for (x = 0; x < 32; x++)
 		if (test_bit(x, &ip->i_flags))
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5406b193227..aad45b7a927 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -92,7 +92,7 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
 
 	ip = gl->gl_object;
 	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(ip->i_di.di_mode))
+	if (!ip || !S_ISREG(inode->i_mode))
 		return;
 
 	if (!test_bit(GIF_PAGED, &ip->i_flags))
@@ -119,7 +119,7 @@ static void gfs2_page_inval(struct gfs2_glock *gl)
 
 	ip = gl->gl_object;
 	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(ip->i_di.di_mode))
+	if (!ip || !S_ISREG(inode->i_mode))
 		return;
 
 	truncate_inode_pages(inode->i_mapping, 0);
@@ -142,7 +142,7 @@ static void gfs2_page_wait(struct gfs2_glock *gl)
 	struct address_space *mapping = inode->i_mapping;
 	int error;
 
-	if (!S_ISREG(ip->i_di.di_mode))
+	if (!S_ISREG(inode->i_mode))
 		return;
 
 	error = filemap_fdatawait(mapping);
@@ -164,7 +164,7 @@ static void gfs2_page_writeback(struct gfs2_glock *gl)
 	struct inode *inode = &ip->i_inode;
 	struct address_space *mapping = inode->i_mapping;
 
-	if (!S_ISREG(ip->i_di.di_mode))
+	if (!S_ISREG(inode->i_mode))
 		return;
 
 	filemap_fdatawrite(mapping);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9959195654..de466043c91 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-	inode->i_mode = di->di_mode;
 	inode->i_nlink = di->di_nlink;
 	inode->i_uid = di->di_uid;
 	inode->i_gid = di->di_gid;
@@ -88,9 +87,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
-	gfs2_assert_withdraw(GFS2_SB(inode),
-		(di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
-	di->di_mode = inode->i_mode;
 	di->di_uid = inode->i_uid;
 	di->di_gid = inode->i_gid;
 	di->di_atime = inode->i_atime.tv_sec;
@@ -210,9 +206,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
 		return -ESTALE;
 
-	di->di_mode = be32_to_cpu(str->di_mode);
+	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
 	ip->i_inode.i_rdev = 0;
-	switch (di->di_mode & S_IFMT) {
+	switch (ip->i_inode.i_mode & S_IFMT) {
 	case S_IFBLK:
 	case S_IFCHR:
 		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
@@ -620,7 +616,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 			       unsigned int *uid, unsigned int *gid)
 {
 	if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
-	    (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) {
+	    (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISUID;
 		else if (dip->i_di.di_uid != current->fsuid)
@@ -629,7 +625,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 	} else
 		*uid = current->fsuid;
 
-	if (dip->i_di.di_mode & S_ISGID) {
+	if (dip->i_inode.i_mode & S_ISGID) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISGID;
 		*gid = dip->i_di.di_gid;
@@ -810,7 +806,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 	}
 
-	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
+	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto fail_end_trans;
 
@@ -1053,7 +1049,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
 		return -EPERM;
 
-	if ((dip->i_di.di_mode & S_ISVTX) &&
+	if ((dip->i_inode.i_mode & S_ISVTX) &&
 	    dip->i_di.di_uid != current->fsuid &&
 	    ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
 		return -EPERM;
@@ -1072,7 +1068,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (!gfs2_inum_equal(&inum, &ip->i_num))
 		return -ENOENT;
 
-	if (IF2DT(ip->i_di.di_mode) != type) {
+	if (IF2DT(ip->i_inode.i_mode) != type) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 33c9ea68f7e..69cbf98509a 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -22,7 +22,7 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip)
 
 static inline int gfs2_is_dir(struct gfs2_inode *ip)
 {
-	return S_ISDIR(ip->i_di.di_mode);
+	return S_ISDIR(ip->i_inode.i_mode);
 }
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 60dd943761a..6b50a5731a5 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -161,7 +161,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	gfs2_inum_out(&ip->i_num, &str->di_num);
 
-	str->di_mode = cpu_to_be32(di->di_mode);
+	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
 	str->di_uid = cpu_to_be32(di->di_uid);
 	str->di_gid = cpu_to_be32(di->di_gid);
 	str->di_nlink = cpu_to_be32(di->di_nlink);
@@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_mode, "0%o");
 	pv(di, di_uid, "%u");
 	pv(di, di_gid, "%u");
 	pv(di, di_nlink, "%u");
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 015640b3f12..45a3d85b1d6 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -498,7 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
-	di->di_mode = cpu_to_be32(inode->i_mode);
 	di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
 	di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
 	di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c36f9e342e6..d355899585d 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -76,7 +76,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	if (!gfs2_inum_equal(&ip->i_num, &inum))
 		goto invalid_gunlock;
 
-	if (IF2DT(ip->i_di.di_mode) != type) {
+	if (IF2DT(ip->i_inode.i_mode) != type) {
 		gfs2_consist_inode(dip);
 		goto fail_gunlock;
 	}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 7ea41757390..b52b9db1a2b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -425,7 +425,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
 	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
 	file->private_data = fp;
 
-	if (S_ISREG(ip->i_di.di_mode)) {
+	if (S_ISREG(ip->i_inode.i_mode)) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (error)
@@ -515,7 +515,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
-	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+	if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
 
 	if (sdp->sd_args.ar_localflocks) {
@@ -617,7 +617,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
-	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+	if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
 
 	if (sdp->sd_args.ar_localflocks)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index c10b914bf8c..cf7a5bae395 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -144,7 +144,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	int alloc_required;
 	int error;
 
-	if (S_ISDIR(ip->i_di.di_mode))
+	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
@@ -220,7 +220,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_di.di_mode));
+			     IF2DT(inode->i_mode));
 	if (error)
 		goto out_end_trans;
 
@@ -564,11 +564,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 
 	/* Make sure we aren't trying to move a dirctory into it's subdir */
 
-	if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
+	if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
 		dir_rename = 1;
 
-		error = gfs2_glock_nq_init(sdp->sd_rename_gl,
-					   LM_ST_EXCLUSIVE, 0,
+		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
 					   &r_gh);
 		if (error)
 			goto out;
@@ -609,7 +608,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_gunlock;
 
-		if (S_ISDIR(nip->i_di.di_mode)) {
+		if (S_ISDIR(nip->i_inode.i_mode)) {
 			if (nip->i_di.di_entries < 2) {
 				if (gfs2_consist_inode(nip))
 					gfs2_dinode_print(nip);
@@ -646,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 				error = -EFBIG;
 				goto out_gunlock;
 			}
-			if (S_ISDIR(ip->i_di.di_mode) &&
+			if (S_ISDIR(ip->i_inode.i_mode) &&
 			    ndip->i_di.di_nlink == (u32)-1) {
 				error = -EMLINK;
 				goto out_gunlock;
@@ -701,7 +700,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	/* Remove the target file, if it exists */
 
 	if (nip) {
-		if (S_ISDIR(nip->i_di.di_mode))
+		if (S_ISDIR(nip->i_inode.i_mode))
 			error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
 		else {
 			error = gfs2_dir_del(ndip, &ndentry->d_name);
@@ -743,7 +742,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		goto out_end_trans;
 
 	error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_di.di_mode));
+			     IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto out_end_trans;
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 9c786d1e702..86351756922 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -407,7 +407,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (error)
 		goto out_uninit;
 
-	if (S_ISDIR(ip->i_di.di_mode) &&
+	if (S_ISDIR(inode->i_mode) &&
 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
 		error = gfs2_dir_exhash_dealloc(ip);
 		if (error)
-- 
cgit v1.2.3


From 2933f9254a6af33db25270778c998a42029da668 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 13:23:29 -0500
Subject: [GFS2] Shrink gfs2_inode (4) - di_uid/di_gid

Remove duplicate di_uid/di_gid fields in favour of using
inode->i_uid/inode->i_gid instead. This saves 8 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c         |  2 +-
 fs/gfs2/bmap.c        |  2 +-
 fs/gfs2/eattr.c       |  2 +-
 fs/gfs2/inode.c       | 23 +++++++++--------------
 fs/gfs2/ondisk.c      |  6 ++----
 fs/gfs2/ops_address.c |  2 +-
 fs/gfs2/ops_inode.c   | 10 ++++------
 fs/gfs2/ops_vm.c      |  2 +-
 fs/gfs2/quota.c       |  8 ++++----
 fs/gfs2/rgrp.c        | 11 +++++------
 10 files changed, 29 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 87f6304f2f5..3908992b27a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -74,7 +74,7 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
 {
 	if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
 		return -EOPNOTSUPP;
-	if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
+	if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER))
 		return -EPERM;
 	if (S_ISLNK(ip->i_inode.i_mode))
 		return -EOPNOTSUPP;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 481a0688254..0c913eecf88 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -819,7 +819,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5208fa94aad..935cc9a5716 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -687,7 +687,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index de466043c91..0de9b22f454 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -52,8 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 
 	inode->i_ino = ip->i_num.no_addr;
 	inode->i_nlink = di->di_nlink;
-	inode->i_uid = di->di_uid;
-	inode->i_gid = di->di_gid;
 	i_size_write(inode, di->di_size);
 	inode->i_atime.tv_sec = di->di_atime;
 	inode->i_mtime.tv_sec = di->di_mtime;
@@ -87,8 +85,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
-	di->di_uid = inode->i_uid;
-	di->di_gid = inode->i_gid;
 	di->di_atime = inode->i_atime.tv_sec;
 	di->di_mtime = inode->i_mtime.tv_sec;
 	di->di_ctime = inode->i_ctime.tv_sec;
@@ -216,8 +212,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 		break;
 	};
 
-	di->di_uid = be32_to_cpu(str->di_uid);
-	di->di_gid = be32_to_cpu(str->di_gid);
+	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
 	di->di_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
@@ -616,19 +612,19 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 			       unsigned int *uid, unsigned int *gid)
 {
 	if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
-	    (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) {
+	    (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISUID;
-		else if (dip->i_di.di_uid != current->fsuid)
+		else if (dip->i_inode.i_uid != current->fsuid)
 			*mode &= ~07111;
-		*uid = dip->i_di.di_uid;
+		*uid = dip->i_inode.i_uid;
 	} else
 		*uid = current->fsuid;
 
 	if (dip->i_inode.i_mode & S_ISGID) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISGID;
-		*gid = dip->i_di.di_gid;
+		*gid = dip->i_inode.i_gid;
 	} else
 		*gid = current->fsgid;
 }
@@ -783,8 +779,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	if (alloc_required < 0)
 		goto fail;
 	if (alloc_required) {
-		error = gfs2_quota_check(dip, dip->i_di.di_uid,
-					 dip->i_di.di_gid);
+		error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
 		if (error)
 			goto fail_quota_locks;
 
@@ -1050,8 +1045,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		return -EPERM;
 
 	if ((dip->i_inode.i_mode & S_ISVTX) &&
-	    dip->i_di.di_uid != current->fsuid &&
-	    ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
+	    dip->i_inode.i_uid != current->fsuid &&
+	    ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER))
 		return -EPERM;
 
 	if (IS_APPEND(&dip->i_inode))
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 6b50a5731a5..e224f6a2264 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -162,8 +162,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	gfs2_inum_out(&ip->i_num, &str->di_num);
 
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(di->di_uid);
-	str->di_gid = cpu_to_be32(di->di_gid);
+	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
 	str->di_nlink = cpu_to_be32(di->di_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
@@ -191,8 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_uid, "%u");
-	pv(di, di_gid, "%u");
 	pv(di, di_nlink, "%u");
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 45a3d85b1d6..38b702a1824 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -386,7 +386,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 		if (error)
 			goto out_alloc_put;
 
-		error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+		error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 		if (error)
 			goto out_qunlock;
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index cf7a5bae395..efbcec3311b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -196,8 +196,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 		if (error)
 			goto out_alloc;
 
-		error = gfs2_quota_check(dip, dip->i_di.di_uid,
-					 dip->i_di.di_gid);
+		error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
 		if (error)
 			goto out_gunlock_q;
 
@@ -673,8 +672,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_alloc;
 
-		error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
-					 ndip->i_di.di_gid);
+		error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid);
 		if (error)
 			goto out_gunlock_q;
 
@@ -885,8 +883,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	u32 ouid, ogid, nuid, ngid;
 	int error;
 
-	ouid = ip->i_di.di_uid;
-	ogid = ip->i_di.di_gid;
+	ouid = inode->i_uid;
+	ogid = inode->i_gid;
 	nuid = attr->ia_uid;
 	ngid = attr->ia_gid;
 
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 5453d2947ab..45a5f11fc39 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -76,7 +76,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 5d00e9b2097..d0db881b55d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -452,19 +452,19 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 
-	error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
+	error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd);
 	if (error)
 		goto out;
 	al->al_qd_num++;
 	qd++;
 
-	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
+	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd);
 	if (error)
 		goto out;
 	al->al_qd_num++;
 	qd++;
 
-	if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
+	if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
 		error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
 		if (error)
 			goto out;
@@ -472,7 +472,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 		qd++;
 	}
 
-	if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
+	if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
 		error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
 		if (error)
 			goto out;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07dfd630505..ff0846528d5 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1217,7 +1217,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 	al->al_alloced++;
 
 	gfs2_statfs_change(sdp, 0, -1, 0);
-	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone--;
@@ -1261,7 +1261,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 	al->al_alloced++;
 
 	gfs2_statfs_change(sdp, 0, -1, 0);
-	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_trans_add_unrevoke(sdp, block);
 
 	spin_lock(&sdp->sd_rindex_spin);
@@ -1337,8 +1337,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_trans_add_rg(rgd);
 
 	gfs2_statfs_change(sdp, 0, +blen, 0);
-	gfs2_quota_change(ip, -(s64)blen,
-			 ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 }
 
 /**
@@ -1366,7 +1365,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_trans_add_rg(rgd);
 
 	gfs2_statfs_change(sdp, 0, +blen, 0);
-	gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, bstart, blen);
 }
 
@@ -1411,7 +1410,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
-	gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
 }
 
-- 
cgit v1.2.3


From 4f56110a00af5fb2e22fbccfcaf944d62cae8fcf Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 14:04:17 -0500
Subject: [GFS2] Shrink gfs2_inode (5) - di_nlink

Remove the di_nlink field in favour of inode->i_nlink and
update the nlink handling to use the proper macros. This
saves 4 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 37 ++++++++++++++++++++++++-------------
 fs/gfs2/ondisk.c    |  3 +--
 fs/gfs2/ops_inode.c | 12 ++++++------
 3 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 0de9b22f454..71120398482 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-	inode->i_nlink = di->di_nlink;
 	i_size_write(inode, di->di_size);
 	inode->i_atime.tv_sec = di->di_atime;
 	inode->i_mtime.tv_sec = di->di_mtime;
@@ -214,7 +213,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 
 	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
 	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
-	di->di_nlink = be32_to_cpu(str->di_nlink);
+	/*
+	 * We will need to review setting the nlink count here in the
+	 * light of the forthcoming ro bind mount work. This is a reminder
+	 * to do that.
+	 */
+	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
 	di->di_atime = be64_to_cpu(str->di_atime);
@@ -336,12 +340,12 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	u32 nlink;
 	int error;
 
-	BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink);
-	nlink = ip->i_di.di_nlink + diff;
+	BUG_ON(diff != 1 && diff != -1);
+	nlink = ip->i_inode.i_nlink + diff;
 
 	/* If we are reducing the nlink count, but the new value ends up being
 	   bigger than the old one, we must have underflowed. */
-	if (diff < 0 && nlink > ip->i_di.di_nlink) {
+	if (diff < 0 && nlink > ip->i_inode.i_nlink) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		return -EIO;
@@ -351,16 +355,19 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	if (error)
 		return error;
 
-	ip->i_di.di_nlink = nlink;
+	if (diff > 0)
+		inc_nlink(&ip->i_inode);
+	else
+		drop_nlink(&ip->i_inode);
+
 	ip->i_di.di_ctime = get_seconds();
-	ip->i_inode.i_nlink = nlink;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	mark_inode_dirty(&ip->i_inode);
 
-	if (ip->i_di.di_nlink == 0) {
+	if (ip->i_inode.i_nlink == 0) {
 		struct gfs2_rgrpd *rgd;
 		struct gfs2_holder ri_gh, rg_gh;
 
@@ -375,7 +382,6 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 		if (error)
 			goto out_norgrp;
 
-		clear_nlink(&ip->i_inode);
 		gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
 		gfs2_glock_dq_uninit(&rg_gh);
 out_norgrp:
@@ -586,7 +592,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 		return error;
 
 	/*  Don't create entries in an unlinked directory  */
-	if (!dip->i_di.di_nlink)
+	if (!dip->i_inode.i_nlink)
 		return -EPERM;
 
 	error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
@@ -602,7 +608,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 
 	if (dip->i_di.di_entries == (u32)-1)
 		return -EFBIG;
-	if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1)
+	if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
 		return -EMLINK;
 
 	return 0;
@@ -808,7 +814,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
 		goto fail_end_trans;
-	ip->i_di.di_nlink = 1;
+	ip->i_inode.i_nlink = 1;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -1016,7 +1022,12 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_change_nlink(ip, -2);
+	/* It looks odd, but it really should be done twice */
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -1);
 	if (error)
 		return error;
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index e224f6a2264..b4e354b1881 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -164,7 +164,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
 	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
 	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-	str->di_nlink = cpu_to_be32(di->di_nlink);
+	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
 	str->di_atime = cpu_to_be64(di->di_atime);
@@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_nlink, "%u");
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
 	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index efbcec3311b..06176dee155 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -169,7 +169,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	error = -EINVAL;
-	if (!dip->i_di.di_nlink)
+	if (!dip->i_inode.i_nlink)
 		goto out_gunlock;
 	error = -EFBIG;
 	if (dip->i_di.di_entries == (u32)-1)
@@ -178,10 +178,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		goto out_gunlock;
 	error = -EINVAL;
-	if (!ip->i_di.di_nlink)
+	if (!ip->i_inode.i_nlink)
 		goto out_gunlock;
 	error = -EMLINK;
-	if (ip->i_di.di_nlink == (u32)-1)
+	if (ip->i_inode.i_nlink == (u32)-1)
 		goto out_gunlock;
 
 	alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
@@ -386,7 +386,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	ip = ghs[1].gh_gl->gl_object;
 
-	ip->i_di.di_nlink = 2;
+	ip->i_inode.i_nlink = 2;
 	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
 	ip->i_di.di_payload_format = GFS2_FORMAT_DE;
@@ -636,7 +636,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		};
 
 		if (odip != ndip) {
-			if (!ndip->i_di.di_nlink) {
+			if (!ndip->i_inode.i_nlink) {
 				error = -EINVAL;
 				goto out_gunlock;
 			}
@@ -645,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 				goto out_gunlock;
 			}
 			if (S_ISDIR(ip->i_inode.i_mode) &&
-			    ndip->i_di.di_nlink == (u32)-1) {
+			    ndip->i_inode.i_nlink == (u32)-1) {
 				error = -EMLINK;
 				goto out_gunlock;
 			}
-- 
cgit v1.2.3


From 1a7b1eed5802502fd649e04784becd58557fdcf1 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 14:35:17 -0500
Subject: [GFS2] Shrink gfs2_inode (6) - di_atime/di_mtime/di_ctime

Remove the di_[amc]time fields and use inode->i_[amc]time
fields instead. This saves 24 bytes from the gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c        | 10 +++++-----
 fs/gfs2/dir.c         | 10 +++++-----
 fs/gfs2/eattr.c       |  9 ++++-----
 fs/gfs2/inode.c       | 44 +++++++++++---------------------------------
 fs/gfs2/inode.h       |  1 -
 fs/gfs2/ondisk.c      | 10 +++-------
 fs/gfs2/ops_address.c |  4 ----
 fs/gfs2/ops_inode.c   |  3 +--
 8 files changed, 29 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0c913eecf88..692d4a3da1b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -778,7 +778,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 			gfs2_free_data(ip, bstart, blen);
 	}
 
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_dinode_out(ip, dibh->b_data);
 
@@ -853,7 +853,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	}
 
 	ip->i_di.di_size = size;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@@ -968,7 +968,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 	if (gfs2_is_stuffed(ip)) {
 		ip->i_di.di_size = size;
-		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -980,7 +980,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 		if (!error) {
 			ip->i_di.di_size = size;
-			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+			ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			gfs2_dinode_out(ip, dibh->b_data);
@@ -1053,7 +1053,7 @@ static int trunc_end(struct gfs2_inode *ip)
 			ip->i_num.no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 	}
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0742761e1e0..ca23c8beb3f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
@@ -229,7 +229,7 @@ out:
 
 	if (ip->i_di.di_size < offset + copied)
 		ip->i_di.di_size = offset + copied;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1560,7 +1560,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 				break;
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
-			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+			ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
@@ -1646,7 +1646,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 		gfs2_consist_inode(dip);
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
-	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+	dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
@@ -1694,7 +1694,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	}
 
-	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+	dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 935cc9a5716..7dde84775ba 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -715,7 +715,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 					    (er->er_mode & S_IFMT));
 			ip->i_inode.i_mode = er->er_mode;
 		}
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -850,7 +850,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
 		ip->i_inode.i_mode = er->er_mode;
 	}
-	ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -1130,7 +1130,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -1285,7 +1285,6 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
 	if (!error) {
 		error = inode_setattr(&ip->i_inode, attr);
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_inode_attr_out(ip);
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 71120398482..c22ae3c3a44 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -52,12 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 
 	inode->i_ino = ip->i_num.no_addr;
 	i_size_write(inode, di->di_size);
-	inode->i_atime.tv_sec = di->di_atime;
-	inode->i_mtime.tv_sec = di->di_mtime;
-	inode->i_ctime.tv_sec = di->di_ctime;
-	inode->i_atime.tv_nsec = 0;
-	inode->i_mtime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 
@@ -72,23 +66,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 		inode->i_flags &= ~S_APPEND;
 }
 
-/**
- * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
- * @ip: The GFS2 inode
- *
- * Only copy out the attributes that we want the VFS layer
- * to be able to modify.
- */
-
-void gfs2_inode_attr_out(struct gfs2_inode *ip)
-{
-	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode_host *di = &ip->i_di;
-	di->di_atime = inode->i_atime.tv_sec;
-	di->di_mtime = inode->i_mtime.tv_sec;
-	di->di_ctime = inode->i_ctime.tv_sec;
-}
-
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -221,9 +198,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
-	di->di_atime = be64_to_cpu(str->di_atime);
-	di->di_mtime = be64_to_cpu(str->di_mtime);
-	di->di_ctime = be64_to_cpu(str->di_ctime);
+	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
+	ip->i_inode.i_atime.tv_nsec = 0;
+	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+	ip->i_inode.i_mtime.tv_nsec = 0;
+	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+	ip->i_inode.i_ctime.tv_nsec = 0;
 
 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -360,7 +340,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	else
 		drop_nlink(&ip->i_inode);
 
-	ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1224,7 +1204,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		return 0;
 
 	curtime = get_seconds();
-	if (curtime - ip->i_di.di_atime >= quantum) {
+	if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
 		gfs2_glock_dq(gh);
 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
 				   gh);
@@ -1236,7 +1216,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		   trying to get exclusive lock. */
 
 		curtime = get_seconds();
-		if (curtime - ip->i_di.di_atime >= quantum) {
+		if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
 			struct buffer_head *dibh;
 			struct gfs2_dinode *di;
 
@@ -1250,11 +1230,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 			if (error)
 				goto fail_end_trans;
 
-			ip->i_di.di_atime = curtime;
+			ip->i_inode.i_atime.tv_sec = curtime;
 
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			di = (struct gfs2_dinode *)dibh->b_data;
-			di->di_atime = cpu_to_be64(ip->i_di.di_atime);
+			di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
 			brelse(dibh);
 
 			gfs2_trans_end(sdp);
@@ -1375,8 +1355,6 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	if (!error) {
 		error = inode_setattr(&ip->i_inode, attr);
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_inode_attr_out(ip);
-
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 69cbf98509a..54d584eddf2 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -26,7 +26,6 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 }
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-void gfs2_inode_attr_out(struct gfs2_inode *ip);
 struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
 struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b4e354b1881..82003e872a3 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -167,9 +167,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
-	str->di_atime = cpu_to_be64(di->di_atime);
-	str->di_mtime = cpu_to_be64(di->di_mtime);
-	str->di_ctime = cpu_to_be64(di->di_ctime);
+	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
 
 	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
 	str->di_goal_data = cpu_to_be64(di->di_goal_data);
@@ -193,10 +193,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
-	printk(KERN_INFO "  di_mtime = %lld\n", (long long)di->di_mtime);
-	printk(KERN_INFO "  di_ctime = %lld\n", (long long)di->di_ctime);
-
 	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
 
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 38b702a1824..5c3962c80e8 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -498,10 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
-	di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
-	di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
-	di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
-
 	brelse(dibh);
 	gfs2_trans_end(sdp);
 	if (al->al_requested) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 06176dee155..585b43a94ac 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -729,7 +729,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		error = gfs2_meta_inode_buffer(ip, &dibh);
 		if (error)
 			goto out_end_trans;
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -915,7 +915,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 
 	error = inode_setattr(inode, attr);
 	gfs2_assert_warn(sdp, !error);
-	gfs2_inode_attr_out(ip);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
-- 
cgit v1.2.3


From a9583c7983cbba9726bfe64ee46613d654fc9e26 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 20:09:14 -0500
Subject: [GFS2] Shrink gfs2_inode (7) - di_payload_format

This is almost never used. Its there for backward
compatibility with GFS1. It doesn't need its own
field since it can always be calculated from the
inode mode & flags. This saves a bit more space
in the gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c       | 1 -
 fs/gfs2/inode.c     | 3 +--
 fs/gfs2/ondisk.c    | 6 +++---
 fs/gfs2/ops_inode.c | 1 -
 4 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ca23c8beb3f..c82d7cb4a65 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -902,7 +902,6 @@ static int dir_make_exhash(struct inode *inode)
 	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
 	dip->i_di.di_blocks++;
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
-	dip->i_di.di_payload_format = 0;
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_di.di_depth = y;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c22ae3c3a44..f6177fc6832 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -210,7 +210,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_generation = be64_to_cpu(str->di_generation);
 
 	di->di_flags = be32_to_cpu(str->di_flags);
-	di->di_payload_format = be32_to_cpu(str->di_payload_format);
 	di->di_height = be16_to_cpu(str->di_height);
 
 	di->di_depth = be16_to_cpu(str->di_depth);
@@ -699,7 +698,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	}
 
 	di->__pad1 = 0;
-	di->di_payload_format = cpu_to_be32(0);
+	di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
 	di->di_height = cpu_to_be32(0);
 	di->__pad2 = 0;
 	di->__pad3 = 0;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 82003e872a3..b2baba5c50b 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -176,9 +176,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_generation = cpu_to_be64(di->di_generation);
 
 	str->di_flags = cpu_to_be32(di->di_flags);
-	str->di_payload_format = cpu_to_be32(di->di_payload_format);
 	str->di_height = cpu_to_be16(di->di_height);
-
+	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(di->di_depth);
 	str->di_entries = cpu_to_be32(di->di_entries);
 
@@ -197,7 +198,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
 
 	pv(di, di_flags, "0x%.8X");
-	pv(di, di_payload_format, "%u");
 	pv(di, di_height, "%u");
 
 	pv(di, di_depth, "%u");
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 585b43a94ac..0e4eade47bf 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -389,7 +389,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	ip->i_inode.i_nlink = 2;
 	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
-	ip->i_di.di_payload_format = GFS2_FORMAT_DE;
 	ip->i_di.di_entries = 2;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-- 
cgit v1.2.3


From bfded27ba010d1c3b0aa3843f97dc9b80de751be Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 16:05:38 -0500
Subject: [GFS2] Shrink gfs2_inode (8) - i_vn

This shrinks the size of the gfs2_inode by 8 bytes by
replacing the version counter with a one bit valid/invalid
flag.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c     | 5 +++--
 fs/gfs2/incore.h    | 2 +-
 fs/gfs2/inode.c     | 4 ++--
 fs/gfs2/ops_inode.c | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index aad45b7a927..9c2033714b0 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -305,8 +305,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 	int data = (flags & DIO_DATA);
 
 	if (meta) {
+		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
-		gl->gl_vn++;
+		set_bit(GIF_INVALID, &ip->i_flags);
 	}
 	if (data)
 		gfs2_page_inval(gl);
@@ -351,7 +352,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
 	if (!ip)
 		return 0;
 
-	if (ip->i_vn != gl->gl_vn) {
+	if (test_bit(GIF_INVALID, &ip->i_flags)) {
 		error = gfs2_inode_refresh(ip);
 		if (error)
 			return error;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c0a8c3b6a85..227a74dc5ec 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -217,6 +217,7 @@ struct gfs2_alloc {
 };
 
 enum {
+	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
 	GIF_PAGED		= 2,
 	GIF_SW_PAGED		= 3,
@@ -228,7 +229,6 @@ struct gfs2_inode {
 
 	unsigned long i_flags;		/* GIF_... */
 
-	u64 i_vn;
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
 
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f6177fc6832..e467780837e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -145,7 +145,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 		if (unlikely(error))
 			goto fail_put;
 
-		ip->i_vn = ip->i_gl->gl_vn - 1;
+		set_bit(GIF_INVALID, &ip->i_flags);
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
 			goto fail_iopen;
@@ -242,7 +242,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 
 	error = gfs2_dinode_in(ip, dibh->b_data);
 	brelse(dibh);
-	ip->i_vn = ip->i_gl->gl_vn;
+	clear_bit(GIF_INVALID, &ip->i_flags);
 
 	return error;
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0e4eade47bf..b247f25effb 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -844,7 +844,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct gfs2_holder i_gh;
 	int error;
 
-	if (ip->i_vn == ip->i_gl->gl_vn)
+	if (!test_bit(GIF_INVALID, &ip->i_flags))
 		return generic_permission(inode, mask, gfs2_check_acl);
 
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-- 
cgit v1.2.3


From 294caaa3b8304c0a14c5039691caf23363bd9369 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 2 Nov 2006 11:59:28 -0500
Subject: [GFS2] Tidy up 0 initialisations in inode.c

We don't need to use endian conversions for 0 initialisations
when creating a new on-disk inode.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e467780837e..faf9b9ed791 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -673,15 +673,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_mode = cpu_to_be32(mode);
 	di->di_uid = cpu_to_be32(uid);
 	di->di_gid = cpu_to_be32(gid);
-	di->di_nlink = cpu_to_be32(0);
-	di->di_size = cpu_to_be64(0);
+	di->di_nlink = 0;
+	di->di_size = 0;
 	di->di_blocks = cpu_to_be64(1);
 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
 	di->di_major = cpu_to_be32(MAJOR(dev));
 	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
 	di->di_generation = cpu_to_be64(*generation);
-	di->di_flags = cpu_to_be32(0);
+	di->di_flags = 0;
 
 	if (S_ISREG(mode)) {
 		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
@@ -699,13 +699,13 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 	di->__pad1 = 0;
 	di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
-	di->di_height = cpu_to_be32(0);
+	di->di_height = 0;
 	di->__pad2 = 0;
 	di->__pad3 = 0;
-	di->di_depth = cpu_to_be16(0);
-	di->di_entries = cpu_to_be32(0);
+	di->di_depth = 0;
+	di->di_entries = 0;
 	memset(&di->__pad4, 0, sizeof(di->__pad4));
-	di->di_eattr = cpu_to_be64(0);
+	di->di_eattr = 0;
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
 
 	brelse(dibh);
-- 
cgit v1.2.3


From f6e58f01e8dc869803b9f73b2aa9d5bc3f32ca05 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 7 Nov 2006 15:14:58 -0500
Subject: [GFS2] Don't copy meta_header for rgrp in and out

The meta_header for an ondisk rgrp never changes, so there is no point
copying it in and back out to disk. Also there is no reason to keep
a copy for each rgrp in memory.

The code already checks to ensure that the header is correct before
it calls the routine to copy the data in, so that we don't even need
to check whether its correct on disk in the functions in ondisk.c

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b2baba5c50b..f2495f1e21a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -65,15 +65,6 @@ static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *bu
 	mh->mh_format = be32_to_cpu(str->mh_format);
 }
 
-static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf)
-{
-	struct gfs2_meta_header *str = buf;
-
-	str->mh_magic = cpu_to_be32(mh->mh_magic);
-	str->mh_type = cpu_to_be32(mh->mh_type);
-	str->mh_format = cpu_to_be32(mh->mh_format);
-}
-
 void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
@@ -119,7 +110,6 @@ void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
 {
 	const struct gfs2_rgrp *str = buf;
 
-	gfs2_meta_header_in(&rg->rg_header, buf);
 	rg->rg_flags = be32_to_cpu(str->rg_flags);
 	rg->rg_free = be32_to_cpu(str->rg_free);
 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
@@ -130,7 +120,6 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
 
-	gfs2_meta_header_out(&rg->rg_header, buf);
 	str->rg_flags = cpu_to_be32(rg->rg_flags);
 	str->rg_free = cpu_to_be32(rg->rg_free);
 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-- 
cgit v1.2.3


From c594d8866460a2710c436839d79f334a0714a2a7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 09:01:13 -0500
Subject: [GFS2] Remove unused GL_DUMP flag

There is no way to set the GL_DUMP flag, and in any case the
same thing can be done with systemtap if required for debugging,
so this removes it.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 3 ---
 fs/gfs2/glock.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 44633c46717..746347a404c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1244,9 +1244,6 @@ restart:
 
 	clear_bit(GLF_PREFETCH, &gl->gl_flags);
 
-	if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
-		dump_glock(gl);
-
 	return error;
 }
 
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2b2a889ee2c..b9856274df5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -28,7 +28,6 @@
 #define GL_NOCACHE		0x00000400
 #define GL_NOCANCEL		0x00001000
 #define GL_AOP			0x00004000
-#define GL_DUMP			0x00008000
 
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
-- 
cgit v1.2.3


From 2ca99501fa5422e84f18333918a503433449e2b5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 10:26:54 -0500
Subject: [GFS2] Fix page lock/glock deadlock

This fixes a race between the glock and the page lock encountered
during truncate in gfs2_readpage and gfs2_prepare_write. The gfs2_readpages
function doesn't need the same fix since it only uses a try lock anyway, so
it will fail back to gfs2_readpage in the case of a potential deadlock.

This bug was spotted by Russell Cattelan.

Cc: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.h       |  1 -
 fs/gfs2/ops_address.c | 13 +++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b9856274df5..a331bf8175e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -27,7 +27,6 @@
 #define GL_ATIME		0x00000200
 #define GL_NOCACHE		0x00000400
 #define GL_NOCANCEL		0x00001000
-#define GL_AOP			0x00004000
 
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 5c3962c80e8..3822189454a 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -230,7 +230,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
 				/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
 				goto skip_lock;
 		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
 		do_unlock = 1;
 		error = gfs2_glock_nq_m_atime(1, &gh);
 		if (unlikely(error))
@@ -254,6 +254,8 @@ skip_lock:
 out:
 	return error;
 out_unlock:
+	if (error == GLR_TRYFAILED)
+		error = AOP_TRUNCATED_PAGE;
 	unlock_page(page);
 	if (do_unlock)
 		gfs2_holder_uninit(&gh);
@@ -293,7 +295,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
 				goto skip_lock;
 		}
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
-				 LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
+				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
 		do_unlock = 1;
 		ret = gfs2_glock_nq_m_atime(1, &gh);
 		if (ret == GLR_TRYFAILED)
@@ -366,10 +368,13 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 	unsigned int write_len = to - from;
 
 
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
 	error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
-	if (error)
+	if (unlikely(error)) {
+		if (error == GLR_TRYFAILED)
+			error = AOP_TRUNCATED_PAGE;
 		goto out_uninit;
+	}
 
 	gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
 
-- 
cgit v1.2.3


From 6b124d8dba1f46c5f2caf3b3159bbe627f75b9b6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 12:51:06 -0500
Subject: [GFS2] Only set inode flags when required

We were setting the inode flags from GFS2's flags far too often, even when they
couldn't possibly have changed. This patch reduces the amount of flag
setting going on so that we do it only when the inode is read in or
when the flags have changed. The create case is covered by the "when
the inode is read in" case.

This also fixes a bug where we didn't set S_SYNC correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c    | 11 +----------
 fs/gfs2/ops_file.c | 19 +++++++++++++++++++
 fs/gfs2/ops_file.h |  2 +-
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index faf9b9ed791..56b39be7692 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -54,16 +54,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	i_size_write(inode, di->di_size);
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
-
-	if (di->di_flags & GFS2_DIF_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-
-	if (di->di_flags & GFS2_DIF_APPENDONLY)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
 }
 
 static int iget_test(struct inode *inode, void *opaque)
@@ -210,6 +200,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_generation = be64_to_cpu(str->di_generation);
 
 	di->di_flags = be32_to_cpu(str->di_flags);
+	gfs2_set_inode_flags(&ip->i_inode);
 	di->di_height = be16_to_cpu(str->di_height);
 
 	di->di_depth = be16_to_cpu(str->di_depth);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index b52b9db1a2b..eabf6c61a96 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -266,6 +266,24 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	return error;
 }
 
+void gfs2_set_inode_flags(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_dinode_host *di = &ip->i_di;
+	unsigned int flags = inode->i_flags;
+
+	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	if (di->di_flags & GFS2_DIF_IMMUTABLE)
+		flags |= S_IMMUTABLE;
+	if (di->di_flags & GFS2_DIF_APPENDONLY)
+		flags |= S_APPEND;
+	if (di->di_flags & GFS2_DIF_NOATIME)
+		flags |= S_NOATIME;
+	if (di->di_flags & GFS2_DIF_SYNC)
+		flags |= S_SYNC;
+	inode->i_flags = flags;
+}
+
 /* Flags that can be set by user space */
 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
 			     GFS2_DIF_DIRECTIO|			\
@@ -338,6 +356,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	ip->i_di.di_flags = new_flags;
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
+	gfs2_set_inode_flags(inode);
 out_trans_end:
 	gfs2_trans_end(sdp);
 out:
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
index ce319f89ec8..7e5d8ec9c84 100644
--- a/fs/gfs2/ops_file.h
+++ b/fs/gfs2/ops_file.h
@@ -17,7 +17,7 @@ extern struct file gfs2_internal_file_sentinel;
 extern int gfs2_internal_read(struct gfs2_inode *ip,
 			      struct file_ra_state *ra_state,
 			      char *buf, loff_t *pos, unsigned size);
-
+extern void gfs2_set_inode_flags(struct inode *inode);
 extern const struct file_operations gfs2_file_fops;
 extern const struct file_operations gfs2_dir_fops;
 
-- 
cgit v1.2.3


From e7c698d74fc9e0e76b3086062b0519df3601ff52 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 13:52:05 -0500
Subject: [GFS2] Inode number is constant

Since the inode number is constant, we don't need to keep updating
it everytime we refresh the other inode fields.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 56b39be7692..19b2736cd52 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -50,7 +50,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
 
-	inode->i_ino = ip->i_num.no_addr;
 	i_size_write(inode, di->di_size);
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
@@ -73,6 +72,7 @@ static int iget_set(struct inode *inode, void *opaque)
 	struct gfs2_inum_host *inum = opaque;
 
 	ip->i_num = *inum;
+	inode->i_ino = inum->no_addr;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9e2dbdac3df300516ffdd9a8631f23164d068a50 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 15:45:46 -0500
Subject: [GFS2] Remove gfs2_inode_attr_in

This function wasn't really doing the right thing. There was no need
to update the inode size at this point and the updating of the
i_blocks field has now been moved to the places where di_blocks is
updated. A result of this patch and some those preceeding it is that
unlocking a glock is now a much more efficient process, since there
is no longer any requirement to copy data from the gfs2 inode into
the vfs inode at this point.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c  |  4 ++++
 fs/gfs2/dir.c   |  4 ++++
 fs/gfs2/eattr.c |  6 ++++++
 fs/gfs2/glops.c |  8 ++------
 fs/gfs2/inode.c | 21 +++------------------
 fs/gfs2/inode.h |  7 +++++++
 6 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 692d4a3da1b..06e3447ea13 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -163,6 +163,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	if (ip->i_di.di_size) {
 		*(__be64 *)(di + 1) = cpu_to_be64(block);
 		ip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&ip->i_inode);
 		di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
 	}
 
@@ -272,6 +273,7 @@ static int build_height(struct inode *inode, unsigned height)
 	*(__be64 *)(di + 1) = cpu_to_be64(bn);
 	ip->i_di.di_height += new_height;
 	ip->i_di.di_blocks += new_height;
+	gfs2_set_inode_blocks(&ip->i_inode);
 	di->di_height = cpu_to_be16(ip->i_di.di_height);
 	di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
 	brelse(dibh);
@@ -415,6 +417,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	*ptr = cpu_to_be64(*block);
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	*new = 1;
 	return 0;
@@ -770,6 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart) {
 		if (metadata)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c82d7cb4a65..a2923fb91bb 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -901,6 +901,7 @@ static int dir_make_exhash(struct inode *inode)
 
 	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
 	dip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&dip->i_inode);
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
@@ -1038,6 +1039,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
 		dip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&dip->i_inode);
 		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
@@ -1516,6 +1518,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 		return error;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	return 0;
@@ -1860,6 +1863,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		if (!dip->i_di.di_blocks)
 			gfs2_consist_inode(dip);
 		dip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&dip->i_inode);
 	}
 
 	error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 7dde84775ba..ebebbdcd705 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -281,6 +281,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart)
 		gfs2_free_meta(ip, bstart, blen);
@@ -598,6 +599,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
 	ea->ea_num_ptrs = 0;
 
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	return 0;
 }
@@ -649,6 +651,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 			gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
 
 			ip->i_di.di_blocks++;
+			gfs2_set_inode_blocks(&ip->i_inode);
 
 			copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
 							   data_len;
@@ -977,6 +980,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		ip->i_di.di_eattr = blk;
 		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
 		ip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&ip->i_inode);
 
 		eablk++;
 	}
@@ -1387,6 +1391,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart)
 		gfs2_free_meta(ip, bstart, blen);
@@ -1441,6 +1446,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	if (!ip->i_di.di_blocks)
 		gfs2_consist_inode(ip);
 	ip->i_di.di_blocks--;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 9c2033714b0..b92de0af0bf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -356,7 +356,6 @@ static int inode_go_lock(struct gfs2_holder *gh)
 		error = gfs2_inode_refresh(ip);
 		if (error)
 			return error;
-		gfs2_inode_attr_in(ip);
 	}
 
 	if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
@@ -380,11 +379,8 @@ static void inode_go_unlock(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_inode *ip = gl->gl_object;
 
-	if (ip == NULL)
-		return;
-	if (test_bit(GLF_DIRTY, &gl->gl_flags))
-		gfs2_inode_attr_in(ip);
-	gfs2_meta_cache_flush(ip);
+	if (ip)
+		gfs2_meta_cache_flush(ip);
 }
 
 /**
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 19b2736cd52..ea9ca239c87 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,29 +38,12 @@
 #include "trans.h"
 #include "util.h"
 
-/**
- * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
- * @ip: The GFS2 inode (with embedded disk inode data)
- * @inode:  The Linux VFS inode
- *
- */
-
-void gfs2_inode_attr_in(struct gfs2_inode *ip)
-{
-	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode_host *di = &ip->i_di;
-
-	i_size_write(inode, di->di_size);
-	inode->i_blocks = di->di_blocks <<
-		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
-}
-
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_inum_host *inum = opaque;
 
-	if (ip && ip->i_num.no_addr == inum->no_addr)
+	if (ip->i_num.no_addr == inum->no_addr)
 		return 1;
 
 	return 0;
@@ -187,7 +170,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	 */
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
+	i_size_write(&ip->i_inode, di->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
+	gfs2_set_inode_blocks(&ip->i_inode);
 	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
 	ip->i_inode.i_atime.tv_nsec = 0;
 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 54d584eddf2..46917edfdd6 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -25,6 +25,13 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 	return S_ISDIR(ip->i_inode.i_mode);
 }
 
+static inline void gfs2_set_inode_blocks(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	inode->i_blocks = ip->i_di.di_blocks <<
+		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+}
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
 struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
-- 
cgit v1.2.3


From 7020933156ac2a8a7386314933e49948bf0438f7 Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Thu, 9 Nov 2006 11:28:08 -0500
Subject: [GFS2] Fix race in logging code

The log lock is dropped prior to io submittion, but
this exposes a hole in which the log data structures
may be going away due to a truncate.
Store the buffer head in a local pointer prior to
dropping the lock and relay on the buffer_head lock
for consitency on the buffer head.

Signed-Off-By: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/lops.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8a654cd253d..4d7f94d8c7b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -509,7 +509,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 {
 	LIST_HEAD(started);
 	struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
-	struct buffer_head *bh = NULL;
+	struct buffer_head *bh = NULL,*bh1 = NULL;
 	unsigned int offset = sizeof(struct gfs2_log_descriptor);
 	struct gfs2_log_descriptor *ld;
 	unsigned int limit;
@@ -537,8 +537,13 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 		list_for_each_entry_safe_continue(bd1, bdt,
 						  &sdp->sd_log_le_databuf,
 						  bd_le.le_list) {
+			/* store off the buffer head in a local ptr since
+			 * gfs2_bufdata might change when we drop the log lock
+			 */
+			bh1 = bd1->bd_bh;
+
 			/* An ordered write buffer */
-			if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+			if (bh1 && !buffer_pinned(bh1)) {
 				list_move(&bd1->bd_le.le_list, &started);
 				if (bd1 == bd2) {
 					bd2 = NULL;
@@ -547,20 +552,21 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 							bd_le.le_list);
 				}
 				total_dbuf--;
-				if (bd1->bd_bh) {
-					get_bh(bd1->bd_bh);
-					if (buffer_dirty(bd1->bd_bh)) {
+				if (bh1) {
+					if (buffer_dirty(bh1)) {
+						get_bh(bh1);
+
 						gfs2_log_unlock(sdp);
-						wait_on_buffer(bd1->bd_bh);
-						ll_rw_block(WRITE, 1,
-							    &bd1->bd_bh);
+
+						ll_rw_block(SWRITE, 1, &bh1);
+						brelse(bh1);
+
 						gfs2_log_lock(sdp);
 					}
-					brelse(bd1->bd_bh);
 					continue;
 				}
 				continue;
-			} else if (bd1->bd_bh) { /* A journaled buffer */
+			} else if (bh1) { /* A journaled buffer */
 				int magic;
 				gfs2_log_unlock(sdp);
 				if (!bh) {
@@ -582,16 +588,16 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 					ld->ld_data2 = cpu_to_be32(0);
 					memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
 				}
-				magic = gfs2_check_magic(bd1->bd_bh);
-				*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+				magic = gfs2_check_magic(bh1);
+				*ptr++ = cpu_to_be64(bh1->b_blocknr);
 				*ptr++ = cpu_to_be64((__u64)magic);
-				clear_buffer_escaped(bd1->bd_bh);
+				clear_buffer_escaped(bh1);
 				if (unlikely(magic != 0))
-					set_buffer_escaped(bd1->bd_bh);
+					set_buffer_escaped(bh1);
 				gfs2_log_lock(sdp);
 				if (n++ > num)
 					break;
-			} else if (!bd1->bd_bh) {
+			} else if (!bh1) {
 				total_dbuf--;
 				sdp->sd_log_num_databuf--;
 				list_del_init(&bd1->bd_le.le_list);
-- 
cgit v1.2.3


From 61057c6bb3a3d14cf2bea6ca20dc6d367e1d852e Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Thu, 9 Nov 2006 11:42:33 -0500
Subject: [GFS2] Remove unused zero_readpage from stuffed_readpage

Stuffed files only consist of a maximum of
(gfs2 block size - sizeof(struct gfs2_dinode)) bytes. Since the
gfs2 block size is always less than page size, we will never see
a call to stuffed_readpage for anything other than the first page
in the file.

Signed-off-by: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 3822189454a..2f7ef980d02 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -156,19 +156,6 @@ out_ignore:
 	return 0;
 }
 
-static int zero_readpage(struct page *page)
-{
-	void *kaddr;
-
-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr, 0, PAGE_CACHE_SIZE);
-	kunmap_atomic(kaddr, KM_USER0);
-
-	SetPageUptodate(page);
-
-	return 0;
-}
-
 /**
  * stuffed_readpage - Fill in a Linux page with stuffed file data
  * @ip: the inode
@@ -183,9 +170,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	void *kaddr;
 	int error;
 
-	/* Only the first page of a stuffed file might contain data */
-	if (unlikely(page->index))
-		return zero_readpage(page);
+	BUG_ON(page->index);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@@ -737,6 +722,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 			if (!atomic_read(&aspace->i_writecount))
 				return 0;
 
+			if (!(gfp_mask & __GFP_WAIT))
+				return 0;
+
 			if (time_after_eq(jiffies, t)) {
 				stuck_releasepage(bh);
 				/* should we withdraw here? */
-- 
cgit v1.2.3


From fdda387f73947e6ae511ec601f5b3c6fbb582aac Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Thu, 2 Nov 2006 11:19:21 -0500
Subject: [DLM] Add support for tcp communications

The following patch adds a TCP based communications layer
to the DLM which is compile time selectable. The existing SCTP
layer gives the advantage of allowing multihoming, whereas
the TCP layer has been heavily tested in previous versions of
the DLM and is known to be robust and therefore can be used as
a baseline for performance testing.

Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/Kconfig         |   17 +
 fs/dlm/Makefile        |    4 +-
 fs/dlm/lowcomms-sctp.c | 1239 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/lowcomms-tcp.c  | 1263 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/lowcomms.c      | 1239 -----------------------------------------------
 5 files changed, 2522 insertions(+), 1240 deletions(-)
 create mode 100644 fs/dlm/lowcomms-sctp.c
 create mode 100644 fs/dlm/lowcomms-tcp.c
 delete mode 100644 fs/dlm/lowcomms.c

(limited to 'fs')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 81b2c6465ee..c5985b883b2 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -9,6 +9,23 @@ config DLM
 	A general purpose distributed lock manager for kernel or userspace
 	applications.
 
+choice
+	prompt "Select DLM communications protocol"
+	depends on DLM
+	default DLM_TCP
+	help
+	The DLM Can use TCP or SCTP for it's network communications.
+	SCTP supports multi-homed operations whereas TCP doesn't.
+	However, SCTP seems to have stability problems at the moment.
+
+config DLM_TCP
+	bool "TCP/IP"
+
+config DLM_SCTP
+	bool "SCTP"
+
+endchoice
+
 config DLM_DEBUG
 	bool "DLM debugging"
 	depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 1832e0297f7..65388944eba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -4,7 +4,6 @@ dlm-y :=			ast.o \
 				dir.o \
 				lock.o \
 				lockspace.o \
-				lowcomms.o \
 				main.o \
 				member.o \
 				memory.o \
@@ -17,3 +16,6 @@ dlm-y :=			ast.o \
 				util.o
 dlm-$(CONFIG_DLM_DEBUG) +=	debug_fs.o
 
+dlm-$(CONFIG_DLM_TCP)   += lowcomms-tcp.o
+
+dlm-$(CONFIG_DLM_SCTP)  += lowcomms-sctp.o
\ No newline at end of file
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
new file mode 100644
index 00000000000..6da6b14d5a6
--- /dev/null
+++ b/fs/dlm/lowcomms-sctp.c
@@ -0,0 +1,1239 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * lowcomms.c
+ *
+ * This is the "low-level" comms layer.
+ *
+ * It is responsible for sending/receiving messages
+ * from other nodes in the cluster.
+ *
+ * Cluster nodes are referred to by their nodeids. nodeids are
+ * simply 32 bit numbers to the locking module - if they need to
+ * be expanded for the cluster infrastructure then that is it's
+ * responsibility. It is this layer's
+ * responsibility to resolve these into IP address or
+ * whatever it needs for inter-node communication.
+ *
+ * The comms level is two kernel threads that deal mainly with
+ * the receiving of messages from other nodes and passing them
+ * up to the mid-level comms layer (which understands the
+ * message format) for execution by the locking core, and
+ * a send thread which does all the setting up of connections
+ * to remote nodes and the sending of data. Threads are not allowed
+ * to send their own data because it may cause them to wait in times
+ * of high load. Also, this way, the sending thread can collect together
+ * messages bound for one node and send them in one block.
+ *
+ * I don't see any problem with the recv thread executing the locking
+ * code on behalf of remote processes as the locking code is
+ * short, efficient and never (well, hardly ever) waits.
+ *
+ */
+
+#include <asm/ioctls.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/sctp/user.h>
+#include <linux/pagemap.h>
+#include <linux/socket.h>
+#include <linux/idr.h>
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "config.h"
+#include "midcomms.h"
+
+static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
+static int			dlm_local_count;
+static int			dlm_local_nodeid;
+
+/* One of these per connected node */
+
+#define NI_INIT_PENDING 1
+#define NI_WRITE_PENDING 2
+
+struct nodeinfo {
+	spinlock_t		lock;
+	sctp_assoc_t		assoc_id;
+	unsigned long		flags;
+	struct list_head	write_list; /* nodes with pending writes */
+	struct list_head	writequeue; /* outgoing writequeue_entries */
+	spinlock_t		writequeue_lock;
+	int			nodeid;
+};
+
+static DEFINE_IDR(nodeinfo_idr);
+static struct rw_semaphore	nodeinfo_lock;
+static int			max_nodeid;
+
+struct cbuf {
+	unsigned		base;
+	unsigned		len;
+	unsigned		mask;
+};
+
+/* Just the one of these, now. But this struct keeps
+   the connection-specific variables together */
+
+#define CF_READ_PENDING 1
+
+struct connection {
+	struct socket          *sock;
+	unsigned long		flags;
+	struct page            *rx_page;
+	atomic_t		waiting_requests;
+	struct cbuf		cb;
+	int                     eagain_flag;
+};
+
+/* An entry waiting to be sent */
+
+struct writequeue_entry {
+	struct list_head	list;
+	struct page            *page;
+	int			offset;
+	int			len;
+	int			end;
+	int			users;
+	struct nodeinfo        *ni;
+};
+
+#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
+#define CBUF_EMPTY(cb) ((cb)->len == 0)
+#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
+#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+
+#define CBUF_INIT(cb, size) \
+do { \
+	(cb)->base = (cb)->len = 0; \
+	(cb)->mask = ((size)-1); \
+} while(0)
+
+#define CBUF_EAT(cb, n) \
+do { \
+	(cb)->len  -= (n); \
+	(cb)->base += (n); \
+	(cb)->base &= (cb)->mask; \
+} while(0)
+
+
+/* List of nodes which have writes pending */
+static struct list_head write_nodes;
+static spinlock_t write_nodes_lock;
+
+/* Maximum number of incoming messages to process before
+ * doing a schedule()
+ */
+#define MAX_RX_MSG_COUNT 25
+
+/* Manage daemons */
+static struct task_struct *recv_task;
+static struct task_struct *send_task;
+static wait_queue_head_t lowcomms_recv_wait;
+static atomic_t accepting;
+
+/* The SCTP connection */
+static struct connection sctp_con;
+
+
+static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
+{
+	struct sockaddr_storage addr;
+	int error;
+
+	if (!dlm_local_count)
+		return -1;
+
+	error = dlm_nodeid_to_addr(nodeid, &addr);
+	if (error)
+		return error;
+
+	if (dlm_local_addr[0]->ss_family == AF_INET) {
+	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
+		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
+		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
+	} else {
+	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
+		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
+		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
+		       sizeof(in6->sin6_addr));
+	}
+
+	return 0;
+}
+
+static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
+{
+	struct nodeinfo *ni;
+	int r;
+	int n;
+
+	down_read(&nodeinfo_lock);
+	ni = idr_find(&nodeinfo_idr, nodeid);
+	up_read(&nodeinfo_lock);
+
+	if (!ni && alloc) {
+		down_write(&nodeinfo_lock);
+
+		ni = idr_find(&nodeinfo_idr, nodeid);
+		if (ni)
+			goto out_up;
+
+		r = idr_pre_get(&nodeinfo_idr, alloc);
+		if (!r)
+			goto out_up;
+
+		ni = kmalloc(sizeof(struct nodeinfo), alloc);
+		if (!ni)
+			goto out_up;
+
+		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
+		if (r) {
+			kfree(ni);
+			ni = NULL;
+			goto out_up;
+		}
+		if (n != nodeid) {
+			idr_remove(&nodeinfo_idr, n);
+			kfree(ni);
+			ni = NULL;
+			goto out_up;
+		}
+		memset(ni, 0, sizeof(struct nodeinfo));
+		spin_lock_init(&ni->lock);
+		INIT_LIST_HEAD(&ni->writequeue);
+		spin_lock_init(&ni->writequeue_lock);
+		ni->nodeid = nodeid;
+
+		if (nodeid > max_nodeid)
+			max_nodeid = nodeid;
+	out_up:
+		up_write(&nodeinfo_lock);
+	}
+
+	return ni;
+}
+
+/* Don't call this too often... */
+static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
+{
+	int i;
+	struct nodeinfo *ni;
+
+	for (i=1; i<=max_nodeid; i++) {
+		ni = nodeid2nodeinfo(i, 0);
+		if (ni && ni->assoc_id == assoc)
+			return ni;
+	}
+	return NULL;
+}
+
+/* Data or notification available on socket */
+static void lowcomms_data_ready(struct sock *sk, int count_unused)
+{
+	atomic_inc(&sctp_con.waiting_requests);
+	if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
+		return;
+
+	wake_up_interruptible(&lowcomms_recv_wait);
+}
+
+
+/* Add the port number to an IP6 or 4 sockaddr and return the address length.
+   Also padd out the struct with zeros to make comparisons meaningful */
+
+static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
+			  int *addr_len)
+{
+	struct sockaddr_in *local4_addr;
+	struct sockaddr_in6 *local6_addr;
+
+	if (!dlm_local_count)
+		return;
+
+	if (!port) {
+		if (dlm_local_addr[0]->ss_family == AF_INET) {
+			local4_addr = (struct sockaddr_in *)dlm_local_addr[0];
+			port = be16_to_cpu(local4_addr->sin_port);
+		} else {
+			local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0];
+			port = be16_to_cpu(local6_addr->sin6_port);
+		}
+	}
+
+	saddr->ss_family = dlm_local_addr[0]->ss_family;
+	if (dlm_local_addr[0]->ss_family == AF_INET) {
+		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
+		in4_addr->sin_port = cpu_to_be16(port);
+		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
+		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
+				      sizeof(struct sockaddr_in));
+		*addr_len = sizeof(struct sockaddr_in);
+	} else {
+		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
+		in6_addr->sin6_port = cpu_to_be16(port);
+		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
+				      sizeof(struct sockaddr_in6));
+		*addr_len = sizeof(struct sockaddr_in6);
+	}
+}
+
+/* Close the connection and tidy up */
+static void close_connection(void)
+{
+	if (sctp_con.sock) {
+		sock_release(sctp_con.sock);
+		sctp_con.sock = NULL;
+	}
+
+	if (sctp_con.rx_page) {
+		__free_page(sctp_con.rx_page);
+		sctp_con.rx_page = NULL;
+	}
+}
+
+/* We only send shutdown messages to nodes that are not part of the cluster */
+static void send_shutdown(sctp_assoc_t associd)
+{
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct msghdr outmessage;
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	int ret;
+
+	outmessage.msg_name = NULL;
+	outmessage.msg_namelen = 0;
+	outmessage.msg_control = outcmsg;
+	outmessage.msg_controllen = sizeof(outcmsg);
+	outmessage.msg_flags = MSG_EOR;
+
+	cmsg = CMSG_FIRSTHDR(&outmessage);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	outmessage.msg_controllen = cmsg->cmsg_len;
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+
+	sinfo->sinfo_flags |= MSG_EOF;
+	sinfo->sinfo_assoc_id = associd;
+
+	ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0);
+
+	if (ret != 0)
+		log_print("send EOF to node failed: %d", ret);
+}
+
+
+/* INIT failed but we don't know which node...
+   restart INIT on all pending nodes */
+static void init_failed(void)
+{
+	int i;
+	struct nodeinfo *ni;
+
+	for (i=1; i<=max_nodeid; i++) {
+		ni = nodeid2nodeinfo(i, 0);
+		if (!ni)
+			continue;
+
+		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
+			ni->assoc_id = 0;
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+		}
+	}
+	wake_up_process(send_task);
+}
+
+/* Something happened to an association */
+static void process_sctp_notification(struct msghdr *msg, char *buf)
+{
+	union sctp_notification *sn = (union sctp_notification *)buf;
+
+	if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
+		switch (sn->sn_assoc_change.sac_state) {
+
+		case SCTP_COMM_UP:
+		case SCTP_RESTART:
+		{
+			/* Check that the new node is in the lockspace */
+			struct sctp_prim prim;
+			mm_segment_t fs;
+			int nodeid;
+			int prim_len, ret;
+			int addr_len;
+			struct nodeinfo *ni;
+
+			/* This seems to happen when we received a connection
+			 * too early... or something...  anyway, it happens but
+			 * we always seem to get a real message too, see
+			 * receive_from_sock */
+
+			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
+				log_print("COMM_UP for invalid assoc ID %d",
+					 (int)sn->sn_assoc_change.sac_assoc_id);
+				init_failed();
+				return;
+			}
+			memset(&prim, 0, sizeof(struct sctp_prim));
+			prim_len = sizeof(struct sctp_prim);
+			prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
+
+			fs = get_fs();
+			set_fs(get_ds());
+			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
+						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
+						(char*)&prim, &prim_len);
+			set_fs(fs);
+			if (ret < 0) {
+				struct nodeinfo *ni;
+
+				log_print("getsockopt/sctp_primary_addr on "
+					  "new assoc %d failed : %d",
+				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
+
+				/* Retry INIT later */
+				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
+				if (ni)
+					clear_bit(NI_INIT_PENDING, &ni->flags);
+				return;
+			}
+			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
+			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
+				log_print("reject connect from unknown addr");
+				send_shutdown(prim.ssp_assoc_id);
+				return;
+			}
+
+			ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
+			if (!ni)
+				return;
+
+			/* Save the assoc ID */
+			spin_lock(&ni->lock);
+			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
+			spin_unlock(&ni->lock);
+
+			log_print("got new/restarted association %d nodeid %d",
+			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
+
+			/* Send any pending writes */
+			clear_bit(NI_INIT_PENDING, &ni->flags);
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+			wake_up_process(send_task);
+		}
+		break;
+
+		case SCTP_COMM_LOST:
+		case SCTP_SHUTDOWN_COMP:
+		{
+			struct nodeinfo *ni;
+
+			ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
+			if (ni) {
+				spin_lock(&ni->lock);
+				ni->assoc_id = 0;
+				spin_unlock(&ni->lock);
+			}
+		}
+		break;
+
+		/* We don't know which INIT failed, so clear the PENDING flags
+		 * on them all.  if assoc_id is zero then it will then try
+		 * again */
+
+		case SCTP_CANT_STR_ASSOC:
+		{
+			log_print("Can't start SCTP association - retrying");
+			init_failed();
+		}
+		break;
+
+		default:
+			log_print("unexpected SCTP assoc change id=%d state=%d",
+				  (int)sn->sn_assoc_change.sac_assoc_id,
+				  sn->sn_assoc_change.sac_state);
+		}
+	}
+}
+
+/* Data received from remote end */
+static int receive_from_sock(void)
+{
+	int ret = 0;
+	struct msghdr msg;
+	struct kvec iov[2];
+	unsigned len;
+	int r;
+	struct sctp_sndrcvinfo *sinfo;
+	struct cmsghdr *cmsg;
+	struct nodeinfo *ni;
+
+	/* These two are marginally too big for stack allocation, but this
+	 * function is (currently) only called by dlm_recvd so static should be
+	 * OK.
+	 */
+	static struct sockaddr_storage msgname;
+	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+
+	if (sctp_con.sock == NULL)
+		goto out;
+
+	if (sctp_con.rx_page == NULL) {
+		/*
+		 * This doesn't need to be atomic, but I think it should
+		 * improve performance if it is.
+		 */
+		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
+		if (sctp_con.rx_page == NULL)
+			goto out_resched;
+		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
+	}
+
+	memset(&incmsg, 0, sizeof(incmsg));
+	memset(&msgname, 0, sizeof(msgname));
+
+	memset(incmsg, 0, sizeof(incmsg));
+	msg.msg_name = &msgname;
+	msg.msg_namelen = sizeof(msgname);
+	msg.msg_flags = 0;
+	msg.msg_control = incmsg;
+	msg.msg_controllen = sizeof(incmsg);
+	msg.msg_iovlen = 1;
+
+	/* I don't see why this circular buffer stuff is necessary for SCTP
+	 * which is a packet-based protocol, but the whole thing breaks under
+	 * load without it! The overhead is minimal (and is in the TCP lowcomms
+	 * anyway, of course) so I'll leave it in until I can figure out what's
+	 * really happening.
+	 */
+
+	/*
+	 * iov[0] is the bit of the circular buffer between the current end
+	 * point (cb.base + cb.len) and the end of the buffer.
+	 */
+	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
+	iov[0].iov_base = page_address(sctp_con.rx_page) +
+			  CBUF_DATA(&sctp_con.cb);
+	iov[1].iov_len = 0;
+
+	/*
+	 * iov[1] is the bit of the circular buffer between the start of the
+	 * buffer and the start of the currently used section (cb.base)
+	 */
+	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
+		iov[1].iov_len = sctp_con.cb.base;
+		iov[1].iov_base = page_address(sctp_con.rx_page);
+		msg.msg_iovlen = 2;
+	}
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
+				 MSG_NOSIGNAL | MSG_DONTWAIT);
+	if (ret <= 0)
+		goto out_close;
+
+	msg.msg_control = incmsg;
+	msg.msg_controllen = sizeof(incmsg);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+
+	if (msg.msg_flags & MSG_NOTIFICATION) {
+		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
+		return 0;
+	}
+
+	/* Is this a new association ? */
+	ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL);
+	if (ni) {
+		ni->assoc_id = sinfo->sinfo_assoc_id;
+		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
+
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+			wake_up_process(send_task);
+		}
+	}
+
+	/* INIT sends a message with length of 1 - ignore it */
+	if (r == 1)
+		return 0;
+
+	CBUF_ADD(&sctp_con.cb, ret);
+	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
+					  page_address(sctp_con.rx_page),
+					  sctp_con.cb.base, sctp_con.cb.len,
+					  PAGE_CACHE_SIZE);
+	if (ret < 0)
+		goto out_close;
+	CBUF_EAT(&sctp_con.cb, ret);
+
+      out:
+	ret = 0;
+	goto out_ret;
+
+      out_resched:
+	lowcomms_data_ready(sctp_con.sock->sk, 0);
+	ret = 0;
+	schedule();
+	goto out_ret;
+
+      out_close:
+	if (ret != -EAGAIN)
+		log_print("error reading from sctp socket: %d", ret);
+      out_ret:
+	return ret;
+}
+
+/* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */
+static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
+{
+	mm_segment_t fs;
+	int result = 0;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	if (num == 1)
+		result = sctp_con.sock->ops->bind(sctp_con.sock,
+					(struct sockaddr *) addr, addr_len);
+	else
+		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
+				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
+	set_fs(fs);
+
+	if (result < 0)
+		log_print("Can't bind to port %d addr number %d",
+			  dlm_config.tcp_port, num);
+
+	return result;
+}
+
+static void init_local(void)
+{
+	struct sockaddr_storage sas, *addr;
+	int i;
+
+	dlm_local_nodeid = dlm_our_nodeid();
+
+	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
+		if (dlm_our_addr(&sas, i))
+			break;
+
+		addr = kmalloc(sizeof(*addr), GFP_KERNEL);
+		if (!addr)
+			break;
+		memcpy(addr, &sas, sizeof(*addr));
+		dlm_local_addr[dlm_local_count++] = addr;
+	}
+}
+
+/* Initialise SCTP socket and bind to all interfaces */
+static int init_sock(void)
+{
+	mm_segment_t fs;
+	struct socket *sock = NULL;
+	struct sockaddr_storage localaddr;
+	struct sctp_event_subscribe subscribe;
+	int result = -EINVAL, num = 1, i, addr_len;
+
+	if (!dlm_local_count) {
+		init_local();
+		if (!dlm_local_count) {
+			log_print("no local IP address has been set");
+			goto out;
+		}
+	}
+
+	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
+				  IPPROTO_SCTP, &sock);
+	if (result < 0) {
+		log_print("Can't create comms socket, check SCTP is loaded");
+		goto out;
+	}
+
+	/* Listen for events */
+	memset(&subscribe, 0, sizeof(subscribe));
+	subscribe.sctp_data_io_event = 1;
+	subscribe.sctp_association_event = 1;
+	subscribe.sctp_send_failure_event = 1;
+	subscribe.sctp_shutdown_event = 1;
+	subscribe.sctp_partial_delivery_event = 1;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
+				       (char *)&subscribe, sizeof(subscribe));
+	set_fs(fs);
+
+	if (result < 0) {
+		log_print("Failed to set SCTP_EVENTS on socket: result=%d",
+			  result);
+		goto create_delsock;
+	}
+
+	/* Init con struct */
+	sock->sk->sk_user_data = &sctp_con;
+	sctp_con.sock = sock;
+	sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready;
+
+	/* Bind to all interfaces. */
+	for (i = 0; i < dlm_local_count; i++) {
+		memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
+		make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
+
+		result = add_bind_addr(&localaddr, addr_len, num);
+		if (result)
+			goto create_delsock;
+		++num;
+	}
+
+	result = sock->ops->listen(sock, 5);
+	if (result < 0) {
+		log_print("Can't set socket listening");
+		goto create_delsock;
+	}
+
+	return 0;
+
+ create_delsock:
+	sock_release(sock);
+	sctp_con.sock = NULL;
+ out:
+	return result;
+}
+
+
+static struct writequeue_entry *new_writequeue_entry(gfp_t allocation)
+{
+	struct writequeue_entry *entry;
+
+	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+	if (!entry)
+		return NULL;
+
+	entry->page = alloc_page(allocation);
+	if (!entry->page) {
+		kfree(entry);
+		return NULL;
+	}
+
+	entry->offset = 0;
+	entry->len = 0;
+	entry->end = 0;
+	entry->users = 0;
+
+	return entry;
+}
+
+void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+{
+	struct writequeue_entry *e;
+	int offset = 0;
+	int users = 0;
+	struct nodeinfo *ni;
+
+	if (!atomic_read(&accepting))
+		return NULL;
+
+	ni = nodeid2nodeinfo(nodeid, allocation);
+	if (!ni)
+		return NULL;
+
+	spin_lock(&ni->writequeue_lock);
+	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
+	if (((struct list_head *) e == &ni->writequeue) ||
+	    (PAGE_CACHE_SIZE - e->end < len)) {
+		e = NULL;
+	} else {
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+	}
+	spin_unlock(&ni->writequeue_lock);
+
+	if (e) {
+	      got_one:
+		if (users == 0)
+			kmap(e->page);
+		*ppc = page_address(e->page) + offset;
+		return e;
+	}
+
+	e = new_writequeue_entry(allocation);
+	if (e) {
+		spin_lock(&ni->writequeue_lock);
+		offset = e->end;
+		e->end += len;
+		e->ni = ni;
+		users = e->users++;
+		list_add_tail(&e->list, &ni->writequeue);
+		spin_unlock(&ni->writequeue_lock);
+		goto got_one;
+	}
+	return NULL;
+}
+
+void dlm_lowcomms_commit_buffer(void *arg)
+{
+	struct writequeue_entry *e = (struct writequeue_entry *) arg;
+	int users;
+	struct nodeinfo *ni = e->ni;
+
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock(&ni->writequeue_lock);
+	users = --e->users;
+	if (users)
+		goto out;
+	e->len = e->end - e->offset;
+	kunmap(e->page);
+	spin_unlock(&ni->writequeue_lock);
+
+	if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+		spin_lock_bh(&write_nodes_lock);
+		list_add_tail(&ni->write_list, &write_nodes);
+		spin_unlock_bh(&write_nodes_lock);
+		wake_up_process(send_task);
+	}
+	return;
+
+      out:
+	spin_unlock(&ni->writequeue_lock);
+	return;
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	__free_page(e->page);
+	kfree(e);
+}
+
+/* Initiate an SCTP association. In theory we could just use sendmsg() on
+   the first IP address and it should work, but this allows us to set up the
+   association before sending any valuable data that we can't afford to lose.
+   It also keeps the send path clean as it can now always use the association ID */
+static void initiate_association(int nodeid)
+{
+	struct sockaddr_storage rem_addr;
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct msghdr outmessage;
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	int ret;
+	int addrlen;
+	char buf[1];
+	struct kvec iov[1];
+	struct nodeinfo *ni;
+
+	log_print("Initiating association with node %d", nodeid);
+
+	ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
+	if (!ni)
+		return;
+
+	if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) {
+		log_print("no address for nodeid %d", nodeid);
+		return;
+	}
+
+	make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
+
+	outmessage.msg_name = &rem_addr;
+	outmessage.msg_namelen = addrlen;
+	outmessage.msg_control = outcmsg;
+	outmessage.msg_controllen = sizeof(outcmsg);
+	outmessage.msg_flags = MSG_EOR;
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = 1;
+
+	/* Real INIT messages seem to cause trouble. Just send a 1 byte message
+	   we can afford to lose */
+	cmsg = CMSG_FIRSTHDR(&outmessage);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
+
+	outmessage.msg_controllen = cmsg->cmsg_len;
+	ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1);
+	if (ret < 0) {
+		log_print("send INIT to node failed: %d", ret);
+		/* Try again later */
+		clear_bit(NI_INIT_PENDING, &ni->flags);
+	}
+}
+
+/* Send a message */
+static int send_to_sock(struct nodeinfo *ni)
+{
+	int ret = 0;
+	struct writequeue_entry *e;
+	int len, offset;
+	struct msghdr outmsg;
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	struct kvec iov;
+
+        /* See if we need to init an association before we start
+	   sending precious messages */
+	spin_lock(&ni->lock);
+	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
+		spin_unlock(&ni->lock);
+		initiate_association(ni->nodeid);
+		return 0;
+	}
+	spin_unlock(&ni->lock);
+
+	outmsg.msg_name = NULL; /* We use assoc_id */
+	outmsg.msg_namelen = 0;
+	outmsg.msg_control = outcmsg;
+	outmsg.msg_controllen = sizeof(outcmsg);
+	outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR;
+
+	cmsg = CMSG_FIRSTHDR(&outmsg);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
+	sinfo->sinfo_assoc_id = ni->assoc_id;
+	outmsg.msg_controllen = cmsg->cmsg_len;
+
+	spin_lock(&ni->writequeue_lock);
+	for (;;) {
+		if (list_empty(&ni->writequeue))
+			break;
+		e = list_entry(ni->writequeue.next, struct writequeue_entry,
+			       list);
+		len = e->len;
+		offset = e->offset;
+		BUG_ON(len == 0 && e->users == 0);
+		spin_unlock(&ni->writequeue_lock);
+		kmap(e->page);
+
+		ret = 0;
+		if (len) {
+			iov.iov_base = page_address(e->page)+offset;
+			iov.iov_len = len;
+
+			ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1,
+					     len);
+			if (ret == -EAGAIN) {
+				sctp_con.eagain_flag = 1;
+				goto out;
+			} else if (ret < 0)
+				goto send_error;
+		} else {
+			/* Don't starve people filling buffers */
+			schedule();
+		}
+
+		spin_lock(&ni->writequeue_lock);
+		e->offset += ret;
+		e->len -= ret;
+
+		if (e->len == 0 && e->users == 0) {
+			list_del(&e->list);
+			free_entry(e);
+			continue;
+		}
+	}
+	spin_unlock(&ni->writequeue_lock);
+ out:
+	return ret;
+
+ send_error:
+	log_print("Error sending to node %d %d", ni->nodeid, ret);
+	spin_lock(&ni->lock);
+	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
+		ni->assoc_id = 0;
+		spin_unlock(&ni->lock);
+		initiate_association(ni->nodeid);
+	} else
+		spin_unlock(&ni->lock);
+
+	return ret;
+}
+
+/* Try to send any messages that are pending */
+static void process_output_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock_bh(&write_nodes_lock);
+	list_for_each_safe(list, temp, &write_nodes) {
+		struct nodeinfo *ni =
+		    list_entry(list, struct nodeinfo, write_list);
+		clear_bit(NI_WRITE_PENDING, &ni->flags);
+		list_del(&ni->write_list);
+
+		spin_unlock_bh(&write_nodes_lock);
+
+		send_to_sock(ni);
+		spin_lock_bh(&write_nodes_lock);
+	}
+	spin_unlock_bh(&write_nodes_lock);
+}
+
+/* Called after we've had -EAGAIN and been woken up */
+static void refill_write_queue(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+
+		if (ni) {
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+		}
+	}
+}
+
+static void clean_one_writequeue(struct nodeinfo *ni)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock(&ni->writequeue_lock);
+	list_for_each_safe(list, temp, &ni->writequeue) {
+		struct writequeue_entry *e =
+			list_entry(list, struct writequeue_entry, list);
+		list_del(&e->list);
+		free_entry(e);
+	}
+	spin_unlock(&ni->writequeue_lock);
+}
+
+static void clean_writequeues(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+		if (ni)
+			clean_one_writequeue(ni);
+	}
+}
+
+
+static void dealloc_nodeinfo(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+		if (ni) {
+			idr_remove(&nodeinfo_idr, i);
+			kfree(ni);
+		}
+	}
+}
+
+int dlm_lowcomms_close(int nodeid)
+{
+	struct nodeinfo *ni;
+
+	ni = nodeid2nodeinfo(nodeid, 0);
+	if (!ni)
+		return -1;
+
+	spin_lock(&ni->lock);
+	if (ni->assoc_id) {
+		ni->assoc_id = 0;
+		/* Don't send shutdown here, sctp will just queue it
+		   till the node comes back up! */
+	}
+	spin_unlock(&ni->lock);
+
+	clean_one_writequeue(ni);
+	clear_bit(NI_INIT_PENDING, &ni->flags);
+	return 0;
+}
+
+static int write_list_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&write_nodes_lock);
+	status = list_empty(&write_nodes);
+	spin_unlock_bh(&write_nodes_lock);
+
+	return status;
+}
+
+static int dlm_recvd(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	while (!kthread_should_stop()) {
+		int count = 0;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&lowcomms_recv_wait, &wait);
+		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
+			schedule();
+		remove_wait_queue(&lowcomms_recv_wait, &wait);
+		set_current_state(TASK_RUNNING);
+
+		if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
+			int ret;
+
+			do {
+				ret = receive_from_sock();
+
+				/* Don't starve out everyone else */
+				if (++count >= MAX_RX_MSG_COUNT) {
+					schedule();
+					count = 0;
+				}
+			} while (!kthread_should_stop() && ret >=0);
+		}
+		schedule();
+	}
+
+	return 0;
+}
+
+static int dlm_sendd(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (write_list_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		if (sctp_con.eagain_flag) {
+			sctp_con.eagain_flag = 0;
+			refill_write_queue();
+		}
+		process_output_queue();
+	}
+
+	remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
+
+	return 0;
+}
+
+static void daemons_stop(void)
+{
+	kthread_stop(recv_task);
+	kthread_stop(send_task);
+}
+
+static int daemons_start(void)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_recvd %d", error);
+		return error;
+	}
+	recv_task = p;
+
+	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_sendd %d", error);
+		kthread_stop(recv_task);
+		return error;
+	}
+	send_task = p;
+
+	return 0;
+}
+
+/*
+ * This is quite likely to sleep...
+ */
+int dlm_lowcomms_start(void)
+{
+	int error;
+
+	error = init_sock();
+	if (error)
+		goto fail_sock;
+	error = daemons_start();
+	if (error)
+		goto fail_sock;
+	atomic_set(&accepting, 1);
+	return 0;
+
+ fail_sock:
+	close_connection();
+	return error;
+}
+
+/* Set all the activity flags to prevent any socket activity. */
+
+void dlm_lowcomms_stop(void)
+{
+	atomic_set(&accepting, 0);
+	sctp_con.flags = 0x7;
+	daemons_stop();
+	clean_writequeues();
+	close_connection();
+	dealloc_nodeinfo();
+	max_nodeid = 0;
+}
+
+int dlm_lowcomms_init(void)
+{
+	init_waitqueue_head(&lowcomms_recv_wait);
+	spin_lock_init(&write_nodes_lock);
+	INIT_LIST_HEAD(&write_nodes);
+	init_rwsem(&nodeinfo_lock);
+	return 0;
+}
+
+void dlm_lowcomms_exit(void)
+{
+	int i;
+
+	for (i = 0; i < dlm_local_count; i++)
+		kfree(dlm_local_addr[i]);
+	dlm_local_count = 0;
+	dlm_local_nodeid = 0;
+}
+
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
new file mode 100644
index 00000000000..7289e59b4bd
--- /dev/null
+++ b/fs/dlm/lowcomms-tcp.c
@@ -0,0 +1,1263 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * lowcomms.c
+ *
+ * This is the "low-level" comms layer.
+ *
+ * It is responsible for sending/receiving messages
+ * from other nodes in the cluster.
+ *
+ * Cluster nodes are referred to by their nodeids. nodeids are
+ * simply 32 bit numbers to the locking module - if they need to
+ * be expanded for the cluster infrastructure then that is it's
+ * responsibility. It is this layer's
+ * responsibility to resolve these into IP address or
+ * whatever it needs for inter-node communication.
+ *
+ * The comms level is two kernel threads that deal mainly with
+ * the receiving of messages from other nodes and passing them
+ * up to the mid-level comms layer (which understands the
+ * message format) for execution by the locking core, and
+ * a send thread which does all the setting up of connections
+ * to remote nodes and the sending of data. Threads are not allowed
+ * to send their own data because it may cause them to wait in times
+ * of high load. Also, this way, the sending thread can collect together
+ * messages bound for one node and send them in one block.
+ *
+ * I don't see any problem with the recv thread executing the locking
+ * code on behalf of remote processes as the locking code is
+ * short, efficient and never waits.
+ *
+ */
+
+
+#include <asm/ioctls.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/pagemap.h>
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "midcomms.h"
+#include "config.h"
+
+struct cbuf {
+	unsigned base;
+	unsigned len;
+	unsigned mask;
+};
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
+#define NODE_INCREMENT 32
+
+#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
+#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
+#define CBUF_EMPTY(cb) ((cb)->len == 0)
+#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
+#define CBUF_EAT(cb, n) do { (cb)->len  -= (n); \
+                             (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
+#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+
+/* Maximum number of incoming messages to process before
+   doing a schedule()
+*/
+#define MAX_RX_MSG_COUNT 25
+
+struct connection {
+	struct socket *sock;	/* NULL if not connected */
+	uint32_t nodeid;	/* So we know who we are in the list */
+	struct rw_semaphore sock_sem;	/* Stop connect races */
+	struct list_head read_list;	/* On this list when ready for reading */
+	struct list_head write_list;	/* On this list when ready for writing */
+	struct list_head state_list;	/* On this list when ready to connect */
+	unsigned long flags;	/* bit 1,2 = We are on the read/write lists */
+#define CF_READ_PENDING 1
+#define CF_WRITE_PENDING 2
+#define CF_CONNECT_PENDING 3
+#define CF_IS_OTHERCON 4
+	struct list_head writequeue;	/* List of outgoing writequeue_entries */
+	struct list_head listenlist;    /* List of allocated listening sockets */
+	spinlock_t writequeue_lock;
+	int (*rx_action) (struct connection *);	/* What to do when active */
+	struct page *rx_page;
+	struct cbuf cb;
+	int retries;
+	atomic_t waiting_requests;
+#define MAX_CONNECT_RETRIES 3
+	struct connection *othercon;
+};
+#define sock2con(x) ((struct connection *)(x)->sk_user_data)
+
+/* An entry waiting to be sent */
+struct writequeue_entry {
+	struct list_head list;
+	struct page *page;
+	int offset;
+	int len;
+	int end;
+	int users;
+	struct connection *con;
+};
+
+static struct sockaddr_storage dlm_local_addr;
+
+/* Manage daemons */
+static struct task_struct *recv_task;
+static struct task_struct *send_task;
+
+static wait_queue_t lowcomms_send_waitq_head;
+static wait_queue_head_t lowcomms_send_waitq;
+static wait_queue_t lowcomms_recv_waitq_head;
+static wait_queue_head_t lowcomms_recv_waitq;
+
+/* An array of pointers to connections, indexed by NODEID */
+static struct connection **connections;
+static struct semaphore connections_lock;
+static kmem_cache_t *con_cache;
+static int conn_array_size;
+static atomic_t accepting;
+
+/* List of sockets that have reads pending */
+static struct list_head read_sockets;
+static spinlock_t read_sockets_lock;
+
+/* List of sockets which have writes pending */
+static struct list_head write_sockets;
+static spinlock_t write_sockets_lock;
+
+/* List of sockets which have connects pending */
+static struct list_head state_sockets;
+static spinlock_t state_sockets_lock;
+
+static struct connection *nodeid2con(int nodeid, gfp_t allocation)
+{
+	struct connection *con = NULL;
+
+	down(&connections_lock);
+	if (nodeid >= conn_array_size) {
+		int new_size = nodeid + NODE_INCREMENT;
+		struct connection **new_conns;
+
+		new_conns = kmalloc(sizeof(struct connection *) *
+				    new_size, allocation);
+		if (!new_conns)
+			goto finish;
+
+		memset(new_conns, 0, sizeof(struct connection *) * new_size);
+		memcpy(new_conns, connections,  sizeof(struct connection *) * conn_array_size);
+		conn_array_size = new_size;
+		kfree(connections);
+		connections = new_conns;
+
+	}
+
+	con = connections[nodeid];
+	if (con == NULL && allocation) {
+		con = kmem_cache_alloc(con_cache, allocation);
+		if (!con)
+			goto finish;
+
+		memset(con, 0, sizeof(*con));
+		con->nodeid = nodeid;
+		init_rwsem(&con->sock_sem);
+		INIT_LIST_HEAD(&con->writequeue);
+		spin_lock_init(&con->writequeue_lock);
+
+		connections[nodeid] = con;
+	}
+
+ finish:
+	up(&connections_lock);
+	return con;
+}
+
+/* Data available on socket or listen socket received a connect */
+static void lowcomms_data_ready(struct sock *sk, int count_unused)
+{
+	struct connection *con = sock2con(sk);
+
+	atomic_inc(&con->waiting_requests);
+	if (test_and_set_bit(CF_READ_PENDING, &con->flags))
+		return;
+
+	spin_lock_bh(&read_sockets_lock);
+	list_add_tail(&con->read_list, &read_sockets);
+	spin_unlock_bh(&read_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_recv_waitq);
+}
+
+static void lowcomms_write_space(struct sock *sk)
+{
+	struct connection *con = sock2con(sk);
+
+	if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+		return;
+
+	spin_lock_bh(&write_sockets_lock);
+	list_add_tail(&con->write_list, &write_sockets);
+	spin_unlock_bh(&write_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static inline void lowcomms_connect_sock(struct connection *con)
+{
+	if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+		return;
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock_bh(&state_sockets_lock);
+	list_add_tail(&con->state_list, &state_sockets);
+	spin_unlock_bh(&state_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static void lowcomms_state_change(struct sock *sk)
+{
+/*	struct connection *con = sock2con(sk); */
+
+	switch (sk->sk_state) {
+	case TCP_ESTABLISHED:
+		lowcomms_write_space(sk);
+		break;
+
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+	case TCP_TIME_WAIT:
+	case TCP_CLOSE:
+	case TCP_CLOSE_WAIT:
+	case TCP_LAST_ACK:
+	case TCP_CLOSING:
+		/* FIXME: I think this causes more trouble than it solves.
+		   lowcomms wil reconnect anyway when there is something to
+		   send. This just attempts reconnection if a node goes down!
+		*/
+		/* lowcomms_connect_sock(con); */
+		break;
+
+	default:
+		printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
+		break;
+	}
+}
+
+/* Make a socket active */
+static int add_sock(struct socket *sock, struct connection *con)
+{
+	con->sock = sock;
+
+	/* Install a data_ready callback */
+	con->sock->sk->sk_data_ready = lowcomms_data_ready;
+	con->sock->sk->sk_write_space = lowcomms_write_space;
+	con->sock->sk->sk_state_change = lowcomms_state_change;
+
+	return 0;
+}
+
+/* Add the port number to an IP6 or 4 sockaddr and return the address
+   length */
+static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
+			  int *addr_len)
+{
+        saddr->ss_family =  dlm_local_addr.ss_family;
+        if (saddr->ss_family == AF_INET) {
+		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
+		in4_addr->sin_port = cpu_to_be16(port);
+		*addr_len = sizeof(struct sockaddr_in);
+	}
+	else {
+		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
+		in6_addr->sin6_port = cpu_to_be16(port);
+		*addr_len = sizeof(struct sockaddr_in6);
+	}
+}
+
+/* Close a remote connection and tidy up */
+static void close_connection(struct connection *con, int and_other)
+{
+	down_write(&con->sock_sem);
+
+	if (con->sock) {
+		sock_release(con->sock);
+		con->sock = NULL;
+	}
+	if (con->othercon && and_other) {
+		/* Argh! recursion in kernel code!
+		   Actually, this isn't a list so it
+		   will only re-enter once.
+		*/
+		close_connection(con->othercon, FALSE);
+	}
+	if (con->rx_page) {
+		__free_page(con->rx_page);
+		con->rx_page = NULL;
+	}
+	con->retries = 0;
+	up_write(&con->sock_sem);
+}
+
+/* Data received from remote end */
+static int receive_from_sock(struct connection *con)
+{
+	int ret = 0;
+	struct msghdr msg;
+	struct iovec iov[2];
+	mm_segment_t fs;
+	unsigned len;
+	int r;
+	int call_again_soon = 0;
+
+	down_read(&con->sock_sem);
+
+	if (con->sock == NULL)
+		goto out;
+	if (con->rx_page == NULL) {
+		/*
+		 * This doesn't need to be atomic, but I think it should
+		 * improve performance if it is.
+		 */
+		con->rx_page = alloc_page(GFP_ATOMIC);
+		if (con->rx_page == NULL)
+			goto out_resched;
+		CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
+	}
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = iov;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_flags = 0;
+
+	/*
+	 * iov[0] is the bit of the circular buffer between the current end
+	 * point (cb.base + cb.len) and the end of the buffer.
+	 */
+	iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
+	iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
+	iov[1].iov_len = 0;
+
+	/*
+	 * iov[1] is the bit of the circular buffer between the start of the
+	 * buffer and the start of the currently used section (cb.base)
+	 */
+	if (CBUF_DATA(&con->cb) >= con->cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
+		iov[1].iov_len = con->cb.base;
+		iov[1].iov_base = page_address(con->rx_page);
+		msg.msg_iovlen = 2;
+	}
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	r = ret = sock_recvmsg(con->sock, &msg, len,
+			       MSG_DONTWAIT | MSG_NOSIGNAL);
+	set_fs(fs);
+
+	if (ret <= 0)
+		goto out_close;
+	if (ret == len)
+		call_again_soon = 1;
+	CBUF_ADD(&con->cb, ret);
+	ret = dlm_process_incoming_buffer(con->nodeid,
+					  page_address(con->rx_page),
+					  con->cb.base, con->cb.len,
+					  PAGE_CACHE_SIZE);
+	if (ret == -EBADMSG) {
+		printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, "
+		       "iov_len=%u, iov_base[0]=%p, read=%d\n",
+		       page_address(con->rx_page), con->cb.base, con->cb.len,
+		       len, iov[0].iov_base, r);
+	}
+	if (ret < 0)
+		goto out_close;
+	CBUF_EAT(&con->cb, ret);
+
+	if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
+		__free_page(con->rx_page);
+		con->rx_page = NULL;
+	}
+
+      out:
+	if (call_again_soon)
+		goto out_resched;
+	up_read(&con->sock_sem);
+	ret = 0;
+	goto out_ret;
+
+      out_resched:
+	lowcomms_data_ready(con->sock->sk, 0);
+	up_read(&con->sock_sem);
+	ret = 0;
+	schedule();
+	goto out_ret;
+
+      out_close:
+	up_read(&con->sock_sem);
+	if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
+		close_connection(con, FALSE);
+		/* Reconnect when there is something to send */
+	}
+
+      out_ret:
+	return ret;
+}
+
+/* Listening socket is busy, accept a connection */
+static int accept_from_sock(struct connection *con)
+{
+	int result;
+	struct sockaddr_storage peeraddr;
+	struct socket *newsock;
+	int len;
+	int nodeid;
+	struct connection *newcon;
+
+	memset(&peeraddr, 0, sizeof(peeraddr));
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock);
+	if (result < 0)
+		return -ENOMEM;
+
+	down_read(&con->sock_sem);
+
+	result = -ENOTCONN;
+	if (con->sock == NULL)
+		goto accept_err;
+
+	newsock->type = con->sock->type;
+	newsock->ops = con->sock->ops;
+
+	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+	if (result < 0)
+		goto accept_err;
+
+	/* Get the connected socket's peer */
+	memset(&peeraddr, 0, sizeof(peeraddr));
+	if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
+				  &len, 2)) {
+		result = -ECONNABORTED;
+		goto accept_err;
+	}
+
+	/* Get the new node's NODEID */
+	make_sockaddr(&peeraddr, 0, &len);
+	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+	    	printk("dlm: connect from non cluster node\n");
+		sock_release(newsock);
+		up_read(&con->sock_sem);
+		return -1;
+	}
+
+	log_print("got connection from %d", nodeid);
+
+	/*  Check to see if we already have a connection to this node. This
+	 *  could happen if the two nodes initiate a connection at roughly
+	 *  the same time and the connections cross on the wire.
+	 * TEMPORARY FIX:
+	 *  In this case we store the incoming one in "othercon"
+	 */
+	newcon = nodeid2con(nodeid, GFP_KERNEL);
+	if (!newcon) {
+		result = -ENOMEM;
+		goto accept_err;
+	}
+	down_write(&newcon->sock_sem);
+	if (newcon->sock) {
+	        struct connection *othercon = newcon->othercon;
+
+		if (!othercon) {
+			othercon = kmem_cache_alloc(con_cache, GFP_KERNEL);
+			if (!othercon) {
+				printk("dlm: failed to allocate incoming socket\n");
+				up_write(&newcon->sock_sem);
+				result = -ENOMEM;
+				goto accept_err;
+			}
+			memset(othercon, 0, sizeof(*othercon));
+			othercon->nodeid = nodeid;
+			othercon->rx_action = receive_from_sock;
+			init_rwsem(&othercon->sock_sem);
+			set_bit(CF_IS_OTHERCON, &othercon->flags);
+			newcon->othercon = othercon;
+		}
+		othercon->sock = newsock;
+		newsock->sk->sk_user_data = othercon;
+		add_sock(newsock, othercon);
+	}
+	else {
+		newsock->sk->sk_user_data = newcon;
+		newcon->rx_action = receive_from_sock;
+		add_sock(newsock, newcon);
+
+	}
+
+	up_write(&newcon->sock_sem);
+
+	/*
+	 * Add it to the active queue in case we got data
+	 * beween processing the accept adding the socket
+	 * to the read_sockets list
+	 */
+	lowcomms_data_ready(newsock->sk, 0);
+	up_read(&con->sock_sem);
+
+	return 0;
+
+      accept_err:
+	up_read(&con->sock_sem);
+	sock_release(newsock);
+
+	if (result != -EAGAIN)
+		printk("dlm: error accepting connection from node: %d\n", result);
+	return result;
+}
+
+/* Connect a new socket to its peer */
+static int connect_to_sock(struct connection *con)
+{
+	int result = -EHOSTUNREACH;
+	struct sockaddr_storage saddr;
+	int addr_len;
+	struct socket *sock;
+
+	if (con->nodeid == 0) {
+		log_print("attempt to connect sock 0 foiled");
+		return 0;
+	}
+
+	down_write(&con->sock_sem);
+	if (con->retries++ > MAX_CONNECT_RETRIES)
+		goto out;
+
+	/* Some odd races can cause double-connects, ignore them */
+	if (con->sock) {
+		result = 0;
+		goto out;
+	}
+
+	/* Create a socket to communicate with */
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (result < 0)
+		goto out_err;
+
+	memset(&saddr, 0, sizeof(saddr));
+	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+	        goto out_err;
+
+	sock->sk->sk_user_data = con;
+	con->rx_action = receive_from_sock;
+
+	make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+
+	add_sock(sock, con);
+
+	log_print("connecting to %d", con->nodeid);
+	result =
+		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
+			       O_NONBLOCK);
+	if (result == -EINPROGRESS)
+		result = 0;
+	if (result != 0)
+		goto out_err;
+
+      out:
+	up_write(&con->sock_sem);
+	/*
+	 * Returning an error here means we've given up trying to connect to
+	 * a remote node, otherwise we return 0 and reschedule the connetion
+	 * attempt
+	 */
+	return result;
+
+      out_err:
+	if (con->sock) {
+		sock_release(con->sock);
+		con->sock = NULL;
+	}
+	/*
+	 * Some errors are fatal and this list might need adjusting. For other
+	 * errors we try again until the max number of retries is reached.
+	 */
+	if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
+	    result != -ENETDOWN && result != EINVAL
+	    && result != -EPROTONOSUPPORT) {
+		lowcomms_connect_sock(con);
+		result = 0;
+	}
+	goto out;
+}
+
+static struct socket *create_listen_sock(struct connection *con, struct sockaddr_storage *saddr)
+{
+        struct socket *sock = NULL;
+	mm_segment_t fs;
+	int result = 0;
+	int one = 1;
+	int addr_len;
+
+	if (dlm_local_addr.ss_family == AF_INET)
+		addr_len = sizeof(struct sockaddr_in);
+	else
+		addr_len = sizeof(struct sockaddr_in6);
+
+	/* Create a socket to communicate with */
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (result < 0) {
+		printk("dlm: Can't create listening comms socket\n");
+		goto create_out;
+	}
+
+	fs = get_fs();
+	set_fs(get_ds());
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
+	set_fs(fs);
+	if (result < 0) {
+		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
+	}
+	sock->sk->sk_user_data = con;
+	con->rx_action = accept_from_sock;
+	con->sock = sock;
+
+	/* Bind to our port */
+	make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+	result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
+	if (result < 0) {
+		printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+		sock_release(sock);
+		sock = NULL;
+		con->sock = NULL;
+		goto create_out;
+	}
+
+	fs = get_fs();
+	set_fs(get_ds());
+
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
+	set_fs(fs);
+	if (result < 0) {
+		printk("dlm: Set keepalive failed: %d\n", result);
+	}
+
+	result = sock->ops->listen(sock, 5);
+	if (result < 0) {
+		printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+		sock_release(sock);
+		sock = NULL;
+		goto create_out;
+	}
+
+      create_out:
+	return sock;
+}
+
+
+/* Listen on all interfaces */
+static int listen_for_all(void)
+{
+	struct socket *sock = NULL;
+	struct connection *con = nodeid2con(0, GFP_KERNEL);
+	int result = -EINVAL;
+
+	/* We don't support multi-homed hosts */
+	memset(con, 0, sizeof(*con));
+	init_rwsem(&con->sock_sem);
+	spin_lock_init(&con->writequeue_lock);
+	INIT_LIST_HEAD(&con->writequeue);
+	set_bit(CF_IS_OTHERCON, &con->flags);
+
+	sock = create_listen_sock(con, &dlm_local_addr);
+	if (sock) {
+		add_sock(sock, con);
+		result = 0;
+	}
+	else {
+		result = -EADDRINUSE;
+	}
+
+	return result;
+}
+
+
+
+static struct writequeue_entry *new_writequeue_entry(struct connection *con,
+						     gfp_t allocation)
+{
+	struct writequeue_entry *entry;
+
+	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+	if (!entry)
+		return NULL;
+
+	entry->page = alloc_page(allocation);
+	if (!entry->page) {
+		kfree(entry);
+		return NULL;
+	}
+
+	entry->offset = 0;
+	entry->len = 0;
+	entry->end = 0;
+	entry->users = 0;
+	entry->con = con;
+
+	return entry;
+}
+
+void *dlm_lowcomms_get_buffer(int nodeid, int len,
+			      gfp_t allocation, char **ppc)
+{
+	struct connection *con;
+	struct writequeue_entry *e;
+	int offset = 0;
+	int users = 0;
+
+	if (!atomic_read(&accepting))
+		return NULL;
+
+	con = nodeid2con(nodeid, allocation);
+	if (!con)
+		return NULL;
+
+	spin_lock(&con->writequeue_lock);
+	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
+	if (((struct list_head *) e == &con->writequeue) ||
+	    (PAGE_CACHE_SIZE - e->end < len)) {
+		e = NULL;
+	} else {
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+	}
+	spin_unlock(&con->writequeue_lock);
+
+	if (e) {
+	      got_one:
+		if (users == 0)
+			kmap(e->page);
+		*ppc = page_address(e->page) + offset;
+		return e;
+	}
+
+	e = new_writequeue_entry(con, allocation);
+	if (e) {
+		spin_lock(&con->writequeue_lock);
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+		list_add_tail(&e->list, &con->writequeue);
+		spin_unlock(&con->writequeue_lock);
+		goto got_one;
+	}
+	return NULL;
+}
+
+void dlm_lowcomms_commit_buffer(void *mh)
+{
+	struct writequeue_entry *e = (struct writequeue_entry *)mh;
+	struct connection *con = e->con;
+	int users;
+
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock(&con->writequeue_lock);
+	users = --e->users;
+	if (users)
+		goto out;
+	e->len = e->end - e->offset;
+	kunmap(e->page);
+	spin_unlock(&con->writequeue_lock);
+
+	if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
+		spin_lock_bh(&write_sockets_lock);
+		list_add_tail(&con->write_list, &write_sockets);
+		spin_unlock_bh(&write_sockets_lock);
+
+		wake_up_interruptible(&lowcomms_send_waitq);
+	}
+	return;
+
+      out:
+	spin_unlock(&con->writequeue_lock);
+	return;
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	__free_page(e->page);
+	kfree(e);
+}
+
+/* Send a message */
+static int send_to_sock(struct connection *con)
+{
+	int ret = 0;
+	ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
+	const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+	struct writequeue_entry *e;
+	int len, offset;
+
+	down_read(&con->sock_sem);
+	if (con->sock == NULL)
+		goto out_connect;
+
+	sendpage = con->sock->ops->sendpage;
+
+	spin_lock(&con->writequeue_lock);
+	for (;;) {
+		e = list_entry(con->writequeue.next, struct writequeue_entry,
+			       list);
+		if ((struct list_head *) e == &con->writequeue)
+			break;
+
+		len = e->len;
+		offset = e->offset;
+		BUG_ON(len == 0 && e->users == 0);
+		spin_unlock(&con->writequeue_lock);
+
+		ret = 0;
+		if (len) {
+			ret = sendpage(con->sock, e->page, offset, len,
+				       msg_flags);
+			if (ret == -EAGAIN || ret == 0)
+				goto out;
+			if (ret <= 0)
+				goto send_error;
+		}
+		else {
+			/* Don't starve people filling buffers */
+			schedule();
+		}
+
+		spin_lock(&con->writequeue_lock);
+		e->offset += ret;
+		e->len -= ret;
+
+		if (e->len == 0 && e->users == 0) {
+			list_del(&e->list);
+			free_entry(e);
+			continue;
+		}
+	}
+	spin_unlock(&con->writequeue_lock);
+      out:
+	up_read(&con->sock_sem);
+	return ret;
+
+      send_error:
+	up_read(&con->sock_sem);
+	close_connection(con, FALSE);
+	lowcomms_connect_sock(con);
+	return ret;
+
+      out_connect:
+	up_read(&con->sock_sem);
+	lowcomms_connect_sock(con);
+	return 0;
+}
+
+static void clean_one_writequeue(struct connection *con)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock(&con->writequeue_lock);
+	list_for_each_safe(list, temp, &con->writequeue) {
+		struct writequeue_entry *e =
+			list_entry(list, struct writequeue_entry, list);
+		list_del(&e->list);
+		free_entry(e);
+	}
+	spin_unlock(&con->writequeue_lock);
+}
+
+/* Called from recovery when it knows that a node has
+   left the cluster */
+int dlm_lowcomms_close(int nodeid)
+{
+	struct connection *con;
+
+	if (!connections)
+		goto out;
+
+	log_print("closing connection to node %d", nodeid);
+	con = nodeid2con(nodeid, 0);
+	if (con) {
+		clean_one_writequeue(con);
+		close_connection(con, TRUE);
+		atomic_set(&con->waiting_requests, 0);
+	}
+	return 0;
+
+      out:
+	return -1;
+}
+
+/* API send message call, may queue the request */
+/* N.B. This is the old interface - use the new one for new calls */
+int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
+{
+	struct writequeue_entry *e;
+	char *b;
+
+	e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
+	if (e) {
+		memcpy(b, buf, len);
+		dlm_lowcomms_commit_buffer(e);
+		return 0;
+	}
+	return -ENOBUFS;
+}
+
+/* Look for activity on active sockets */
+static void process_sockets(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int count = 0;
+
+	spin_lock_bh(&read_sockets_lock);
+	list_for_each_safe(list, temp, &read_sockets) {
+
+		struct connection *con =
+		    list_entry(list, struct connection, read_list);
+		list_del(&con->read_list);
+		clear_bit(CF_READ_PENDING, &con->flags);
+
+		spin_unlock_bh(&read_sockets_lock);
+
+		/* This can reach zero if we are processing requests
+		 * as they come in.
+		 */
+		if (atomic_read(&con->waiting_requests) == 0) {
+			spin_lock_bh(&read_sockets_lock);
+			continue;
+		}
+
+		do {
+			con->rx_action(con);
+
+			/* Don't starve out everyone else */
+			if (++count >= MAX_RX_MSG_COUNT) {
+				schedule();
+				count = 0;
+			}
+
+		} while (!atomic_dec_and_test(&con->waiting_requests) &&
+			 !kthread_should_stop());
+
+		spin_lock_bh(&read_sockets_lock);
+	}
+	spin_unlock_bh(&read_sockets_lock);
+}
+
+/* Try to send any messages that are pending
+ */
+static void process_output_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int ret;
+
+	spin_lock_bh(&write_sockets_lock);
+	list_for_each_safe(list, temp, &write_sockets) {
+		struct connection *con =
+		    list_entry(list, struct connection, write_list);
+		clear_bit(CF_WRITE_PENDING, &con->flags);
+		list_del(&con->write_list);
+
+		spin_unlock_bh(&write_sockets_lock);
+
+		ret = send_to_sock(con);
+		if (ret < 0) {
+		}
+		spin_lock_bh(&write_sockets_lock);
+	}
+	spin_unlock_bh(&write_sockets_lock);
+}
+
+static void process_state_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int ret;
+
+	spin_lock_bh(&state_sockets_lock);
+	list_for_each_safe(list, temp, &state_sockets) {
+		struct connection *con =
+		    list_entry(list, struct connection, state_list);
+		list_del(&con->state_list);
+		clear_bit(CF_CONNECT_PENDING, &con->flags);
+		spin_unlock_bh(&state_sockets_lock);
+
+		ret = connect_to_sock(con);
+		if (ret < 0) {
+		}
+		spin_lock_bh(&state_sockets_lock);
+	}
+	spin_unlock_bh(&state_sockets_lock);
+}
+
+
+/* Discard all entries on the write queues */
+static void clean_writequeues(void)
+{
+	int nodeid;
+
+	for (nodeid = 1; nodeid < conn_array_size; nodeid++) {
+		struct connection *con = nodeid2con(nodeid, 0);
+
+		if (con)
+			clean_one_writequeue(con);
+	}
+}
+
+static int read_list_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&read_sockets_lock);
+	status = list_empty(&read_sockets);
+	spin_unlock_bh(&read_sockets_lock);
+
+	return status;
+}
+
+/* DLM Transport comms receive daemon */
+static int dlm_recvd(void *data)
+{
+	init_waitqueue_head(&lowcomms_recv_waitq);
+	init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
+	add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (read_list_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		process_sockets();
+	}
+
+	return 0;
+}
+
+static int write_and_state_lists_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&write_sockets_lock);
+	status = list_empty(&write_sockets);
+	spin_unlock_bh(&write_sockets_lock);
+
+	spin_lock_bh(&state_sockets_lock);
+	if (list_empty(&state_sockets) == 0)
+		status = 0;
+	spin_unlock_bh(&state_sockets_lock);
+
+	return status;
+}
+
+/* DLM Transport send daemon */
+static int dlm_sendd(void *data)
+{
+	init_waitqueue_head(&lowcomms_send_waitq);
+	init_waitqueue_entry(&lowcomms_send_waitq_head, current);
+	add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (write_and_state_lists_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		process_state_queue();
+		process_output_queue();
+	}
+
+	return 0;
+}
+
+static void daemons_stop(void)
+{
+	kthread_stop(recv_task);
+	kthread_stop(send_task);
+}
+
+static int daemons_start(void)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_recvd %d", error);
+		return error;
+	}
+	recv_task = p;
+
+	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_sendd %d", error);
+		kthread_stop(recv_task);
+		return error;
+	}
+	send_task = p;
+
+	return 0;
+}
+
+/*
+ * Return the largest buffer size we can cope with.
+ */
+int lowcomms_max_buffer_size(void)
+{
+	return PAGE_CACHE_SIZE;
+}
+
+void dlm_lowcomms_stop(void)
+{
+	int i;
+
+	atomic_set(&accepting, 0);
+
+	/* Set all the activity flags to prevent any
+	   socket activity.
+	*/
+	for (i = 0; i < conn_array_size; i++) {
+		if (connections[i])
+			connections[i]->flags |= 0x7;
+	}
+	daemons_stop();
+	clean_writequeues();
+
+	for (i = 0; i < conn_array_size; i++) {
+		if (connections[i]) {
+			close_connection(connections[i], TRUE);
+			if (connections[i]->othercon)
+				kmem_cache_free(con_cache, connections[i]->othercon);
+			kmem_cache_free(con_cache, connections[i]);
+		}
+	}
+
+	kfree(connections);
+	connections = NULL;
+
+	kmem_cache_destroy(con_cache);
+}
+
+/* This is quite likely to sleep... */
+int dlm_lowcomms_start(void)
+{
+	int error = 0;
+
+	error = -ENOTCONN;
+
+	/*
+	 * Temporarily initialise the waitq head so that lowcomms_send_message
+	 * doesn't crash if it gets called before the thread is fully
+	 * initialised
+	 */
+	init_waitqueue_head(&lowcomms_send_waitq);
+
+	error = -ENOMEM;
+	connections = kmalloc(sizeof(struct connection *) *
+			      NODE_INCREMENT, GFP_KERNEL);
+	if (!connections)
+		goto out;
+
+	memset(connections, 0,
+	       sizeof(struct connection *) * NODE_INCREMENT);
+
+	conn_array_size = NODE_INCREMENT;
+
+	if (dlm_our_addr(&dlm_local_addr, 0)) {
+		log_print("no local IP address has been set");
+		goto fail_free_conn;
+	}
+	if (!dlm_our_addr(&dlm_local_addr, 1)) {
+		log_print("This dlm comms module does not support multi-homed clustering");
+		goto fail_free_conn;
+	}
+
+	con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
+				      __alignof__(struct connection), 0, NULL, NULL);
+	if (!con_cache)
+		goto fail_free_conn;
+
+
+	/* Start listening */
+	error = listen_for_all();
+	if (error)
+		goto fail_unlisten;
+
+	error = daemons_start();
+	if (error)
+		goto fail_unlisten;
+
+	atomic_set(&accepting, 1);
+
+	return 0;
+
+      fail_unlisten:
+	close_connection(connections[0], 0);
+	kmem_cache_free(con_cache, connections[0]);
+	kmem_cache_destroy(con_cache);
+
+      fail_free_conn:
+	kfree(connections);
+
+      out:
+	return error;
+}
+
+int dlm_lowcomms_init(void)
+{
+	INIT_LIST_HEAD(&read_sockets);
+	INIT_LIST_HEAD(&write_sockets);
+	INIT_LIST_HEAD(&state_sockets);
+
+	spin_lock_init(&read_sockets_lock);
+	spin_lock_init(&write_sockets_lock);
+	spin_lock_init(&state_sockets_lock);
+	init_MUTEX(&connections_lock);
+
+	return 0;
+}
+
+void dlm_lowcomms_exit(void)
+{
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
deleted file mode 100644
index 6da6b14d5a6..00000000000
--- a/fs/dlm/lowcomms.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/******************************************************************************
-*******************************************************************************
-**
-**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
-**
-**  This copyrighted material is made available to anyone wishing to use,
-**  modify, copy, or redistribute it subject to the terms and conditions
-**  of the GNU General Public License v.2.
-**
-*******************************************************************************
-******************************************************************************/
-
-/*
- * lowcomms.c
- *
- * This is the "low-level" comms layer.
- *
- * It is responsible for sending/receiving messages
- * from other nodes in the cluster.
- *
- * Cluster nodes are referred to by their nodeids. nodeids are
- * simply 32 bit numbers to the locking module - if they need to
- * be expanded for the cluster infrastructure then that is it's
- * responsibility. It is this layer's
- * responsibility to resolve these into IP address or
- * whatever it needs for inter-node communication.
- *
- * The comms level is two kernel threads that deal mainly with
- * the receiving of messages from other nodes and passing them
- * up to the mid-level comms layer (which understands the
- * message format) for execution by the locking core, and
- * a send thread which does all the setting up of connections
- * to remote nodes and the sending of data. Threads are not allowed
- * to send their own data because it may cause them to wait in times
- * of high load. Also, this way, the sending thread can collect together
- * messages bound for one node and send them in one block.
- *
- * I don't see any problem with the recv thread executing the locking
- * code on behalf of remote processes as the locking code is
- * short, efficient and never (well, hardly ever) waits.
- *
- */
-
-#include <asm/ioctls.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <net/sctp/user.h>
-#include <linux/pagemap.h>
-#include <linux/socket.h>
-#include <linux/idr.h>
-
-#include "dlm_internal.h"
-#include "lowcomms.h"
-#include "config.h"
-#include "midcomms.h"
-
-static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
-static int			dlm_local_count;
-static int			dlm_local_nodeid;
-
-/* One of these per connected node */
-
-#define NI_INIT_PENDING 1
-#define NI_WRITE_PENDING 2
-
-struct nodeinfo {
-	spinlock_t		lock;
-	sctp_assoc_t		assoc_id;
-	unsigned long		flags;
-	struct list_head	write_list; /* nodes with pending writes */
-	struct list_head	writequeue; /* outgoing writequeue_entries */
-	spinlock_t		writequeue_lock;
-	int			nodeid;
-};
-
-static DEFINE_IDR(nodeinfo_idr);
-static struct rw_semaphore	nodeinfo_lock;
-static int			max_nodeid;
-
-struct cbuf {
-	unsigned		base;
-	unsigned		len;
-	unsigned		mask;
-};
-
-/* Just the one of these, now. But this struct keeps
-   the connection-specific variables together */
-
-#define CF_READ_PENDING 1
-
-struct connection {
-	struct socket          *sock;
-	unsigned long		flags;
-	struct page            *rx_page;
-	atomic_t		waiting_requests;
-	struct cbuf		cb;
-	int                     eagain_flag;
-};
-
-/* An entry waiting to be sent */
-
-struct writequeue_entry {
-	struct list_head	list;
-	struct page            *page;
-	int			offset;
-	int			len;
-	int			end;
-	int			users;
-	struct nodeinfo        *ni;
-};
-
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
-
-#define CBUF_INIT(cb, size) \
-do { \
-	(cb)->base = (cb)->len = 0; \
-	(cb)->mask = ((size)-1); \
-} while(0)
-
-#define CBUF_EAT(cb, n) \
-do { \
-	(cb)->len  -= (n); \
-	(cb)->base += (n); \
-	(cb)->base &= (cb)->mask; \
-} while(0)
-
-
-/* List of nodes which have writes pending */
-static struct list_head write_nodes;
-static spinlock_t write_nodes_lock;
-
-/* Maximum number of incoming messages to process before
- * doing a schedule()
- */
-#define MAX_RX_MSG_COUNT 25
-
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-static wait_queue_head_t lowcomms_recv_wait;
-static atomic_t accepting;
-
-/* The SCTP connection */
-static struct connection sctp_con;
-
-
-static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
-{
-	struct sockaddr_storage addr;
-	int error;
-
-	if (!dlm_local_count)
-		return -1;
-
-	error = dlm_nodeid_to_addr(nodeid, &addr);
-	if (error)
-		return error;
-
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
-	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
-		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
-		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
-	} else {
-	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
-		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
-		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
-		       sizeof(in6->sin6_addr));
-	}
-
-	return 0;
-}
-
-static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
-{
-	struct nodeinfo *ni;
-	int r;
-	int n;
-
-	down_read(&nodeinfo_lock);
-	ni = idr_find(&nodeinfo_idr, nodeid);
-	up_read(&nodeinfo_lock);
-
-	if (!ni && alloc) {
-		down_write(&nodeinfo_lock);
-
-		ni = idr_find(&nodeinfo_idr, nodeid);
-		if (ni)
-			goto out_up;
-
-		r = idr_pre_get(&nodeinfo_idr, alloc);
-		if (!r)
-			goto out_up;
-
-		ni = kmalloc(sizeof(struct nodeinfo), alloc);
-		if (!ni)
-			goto out_up;
-
-		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
-		if (r) {
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		if (n != nodeid) {
-			idr_remove(&nodeinfo_idr, n);
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		memset(ni, 0, sizeof(struct nodeinfo));
-		spin_lock_init(&ni->lock);
-		INIT_LIST_HEAD(&ni->writequeue);
-		spin_lock_init(&ni->writequeue_lock);
-		ni->nodeid = nodeid;
-
-		if (nodeid > max_nodeid)
-			max_nodeid = nodeid;
-	out_up:
-		up_write(&nodeinfo_lock);
-	}
-
-	return ni;
-}
-
-/* Don't call this too often... */
-static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
-{
-	int i;
-	struct nodeinfo *ni;
-
-	for (i=1; i<=max_nodeid; i++) {
-		ni = nodeid2nodeinfo(i, 0);
-		if (ni && ni->assoc_id == assoc)
-			return ni;
-	}
-	return NULL;
-}
-
-/* Data or notification available on socket */
-static void lowcomms_data_ready(struct sock *sk, int count_unused)
-{
-	atomic_inc(&sctp_con.waiting_requests);
-	if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
-		return;
-
-	wake_up_interruptible(&lowcomms_recv_wait);
-}
-
-
-/* Add the port number to an IP6 or 4 sockaddr and return the address length.
-   Also padd out the struct with zeros to make comparisons meaningful */
-
-static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
-			  int *addr_len)
-{
-	struct sockaddr_in *local4_addr;
-	struct sockaddr_in6 *local6_addr;
-
-	if (!dlm_local_count)
-		return;
-
-	if (!port) {
-		if (dlm_local_addr[0]->ss_family == AF_INET) {
-			local4_addr = (struct sockaddr_in *)dlm_local_addr[0];
-			port = be16_to_cpu(local4_addr->sin_port);
-		} else {
-			local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0];
-			port = be16_to_cpu(local6_addr->sin6_port);
-		}
-	}
-
-	saddr->ss_family = dlm_local_addr[0]->ss_family;
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
-		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
-		in4_addr->sin_port = cpu_to_be16(port);
-		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
-		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in));
-		*addr_len = sizeof(struct sockaddr_in);
-	} else {
-		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
-		in6_addr->sin6_port = cpu_to_be16(port);
-		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in6));
-		*addr_len = sizeof(struct sockaddr_in6);
-	}
-}
-
-/* Close the connection and tidy up */
-static void close_connection(void)
-{
-	if (sctp_con.sock) {
-		sock_release(sctp_con.sock);
-		sctp_con.sock = NULL;
-	}
-
-	if (sctp_con.rx_page) {
-		__free_page(sctp_con.rx_page);
-		sctp_con.rx_page = NULL;
-	}
-}
-
-/* We only send shutdown messages to nodes that are not part of the cluster */
-static void send_shutdown(sctp_assoc_t associd)
-{
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct msghdr outmessage;
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	int ret;
-
-	outmessage.msg_name = NULL;
-	outmessage.msg_namelen = 0;
-	outmessage.msg_control = outcmsg;
-	outmessage.msg_controllen = sizeof(outcmsg);
-	outmessage.msg_flags = MSG_EOR;
-
-	cmsg = CMSG_FIRSTHDR(&outmessage);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	outmessage.msg_controllen = cmsg->cmsg_len;
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-
-	sinfo->sinfo_flags |= MSG_EOF;
-	sinfo->sinfo_assoc_id = associd;
-
-	ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0);
-
-	if (ret != 0)
-		log_print("send EOF to node failed: %d", ret);
-}
-
-
-/* INIT failed but we don't know which node...
-   restart INIT on all pending nodes */
-static void init_failed(void)
-{
-	int i;
-	struct nodeinfo *ni;
-
-	for (i=1; i<=max_nodeid; i++) {
-		ni = nodeid2nodeinfo(i, 0);
-		if (!ni)
-			continue;
-
-		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
-			ni->assoc_id = 0;
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-		}
-	}
-	wake_up_process(send_task);
-}
-
-/* Something happened to an association */
-static void process_sctp_notification(struct msghdr *msg, char *buf)
-{
-	union sctp_notification *sn = (union sctp_notification *)buf;
-
-	if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
-		switch (sn->sn_assoc_change.sac_state) {
-
-		case SCTP_COMM_UP:
-		case SCTP_RESTART:
-		{
-			/* Check that the new node is in the lockspace */
-			struct sctp_prim prim;
-			mm_segment_t fs;
-			int nodeid;
-			int prim_len, ret;
-			int addr_len;
-			struct nodeinfo *ni;
-
-			/* This seems to happen when we received a connection
-			 * too early... or something...  anyway, it happens but
-			 * we always seem to get a real message too, see
-			 * receive_from_sock */
-
-			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
-				log_print("COMM_UP for invalid assoc ID %d",
-					 (int)sn->sn_assoc_change.sac_assoc_id);
-				init_failed();
-				return;
-			}
-			memset(&prim, 0, sizeof(struct sctp_prim));
-			prim_len = sizeof(struct sctp_prim);
-			prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
-
-			fs = get_fs();
-			set_fs(get_ds());
-			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
-						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-						(char*)&prim, &prim_len);
-			set_fs(fs);
-			if (ret < 0) {
-				struct nodeinfo *ni;
-
-				log_print("getsockopt/sctp_primary_addr on "
-					  "new assoc %d failed : %d",
-				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
-
-				/* Retry INIT later */
-				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
-				if (ni)
-					clear_bit(NI_INIT_PENDING, &ni->flags);
-				return;
-			}
-			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
-			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
-				log_print("reject connect from unknown addr");
-				send_shutdown(prim.ssp_assoc_id);
-				return;
-			}
-
-			ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
-			if (!ni)
-				return;
-
-			/* Save the assoc ID */
-			spin_lock(&ni->lock);
-			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
-			spin_unlock(&ni->lock);
-
-			log_print("got new/restarted association %d nodeid %d",
-			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
-
-			/* Send any pending writes */
-			clear_bit(NI_INIT_PENDING, &ni->flags);
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-			wake_up_process(send_task);
-		}
-		break;
-
-		case SCTP_COMM_LOST:
-		case SCTP_SHUTDOWN_COMP:
-		{
-			struct nodeinfo *ni;
-
-			ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
-			if (ni) {
-				spin_lock(&ni->lock);
-				ni->assoc_id = 0;
-				spin_unlock(&ni->lock);
-			}
-		}
-		break;
-
-		/* We don't know which INIT failed, so clear the PENDING flags
-		 * on them all.  if assoc_id is zero then it will then try
-		 * again */
-
-		case SCTP_CANT_STR_ASSOC:
-		{
-			log_print("Can't start SCTP association - retrying");
-			init_failed();
-		}
-		break;
-
-		default:
-			log_print("unexpected SCTP assoc change id=%d state=%d",
-				  (int)sn->sn_assoc_change.sac_assoc_id,
-				  sn->sn_assoc_change.sac_state);
-		}
-	}
-}
-
-/* Data received from remote end */
-static int receive_from_sock(void)
-{
-	int ret = 0;
-	struct msghdr msg;
-	struct kvec iov[2];
-	unsigned len;
-	int r;
-	struct sctp_sndrcvinfo *sinfo;
-	struct cmsghdr *cmsg;
-	struct nodeinfo *ni;
-
-	/* These two are marginally too big for stack allocation, but this
-	 * function is (currently) only called by dlm_recvd so static should be
-	 * OK.
-	 */
-	static struct sockaddr_storage msgname;
-	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-
-	if (sctp_con.sock == NULL)
-		goto out;
-
-	if (sctp_con.rx_page == NULL) {
-		/*
-		 * This doesn't need to be atomic, but I think it should
-		 * improve performance if it is.
-		 */
-		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
-		if (sctp_con.rx_page == NULL)
-			goto out_resched;
-		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
-	}
-
-	memset(&incmsg, 0, sizeof(incmsg));
-	memset(&msgname, 0, sizeof(msgname));
-
-	memset(incmsg, 0, sizeof(incmsg));
-	msg.msg_name = &msgname;
-	msg.msg_namelen = sizeof(msgname);
-	msg.msg_flags = 0;
-	msg.msg_control = incmsg;
-	msg.msg_controllen = sizeof(incmsg);
-	msg.msg_iovlen = 1;
-
-	/* I don't see why this circular buffer stuff is necessary for SCTP
-	 * which is a packet-based protocol, but the whole thing breaks under
-	 * load without it! The overhead is minimal (and is in the TCP lowcomms
-	 * anyway, of course) so I'll leave it in until I can figure out what's
-	 * really happening.
-	 */
-
-	/*
-	 * iov[0] is the bit of the circular buffer between the current end
-	 * point (cb.base + cb.len) and the end of the buffer.
-	 */
-	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
-	iov[0].iov_base = page_address(sctp_con.rx_page) +
-			  CBUF_DATA(&sctp_con.cb);
-	iov[1].iov_len = 0;
-
-	/*
-	 * iov[1] is the bit of the circular buffer between the start of the
-	 * buffer and the start of the currently used section (cb.base)
-	 */
-	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
-		iov[1].iov_len = sctp_con.cb.base;
-		iov[1].iov_base = page_address(sctp_con.rx_page);
-		msg.msg_iovlen = 2;
-	}
-	len = iov[0].iov_len + iov[1].iov_len;
-
-	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
-				 MSG_NOSIGNAL | MSG_DONTWAIT);
-	if (ret <= 0)
-		goto out_close;
-
-	msg.msg_control = incmsg;
-	msg.msg_controllen = sizeof(incmsg);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-
-	if (msg.msg_flags & MSG_NOTIFICATION) {
-		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
-		return 0;
-	}
-
-	/* Is this a new association ? */
-	ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL);
-	if (ni) {
-		ni->assoc_id = sinfo->sinfo_assoc_id;
-		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
-
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-			wake_up_process(send_task);
-		}
-	}
-
-	/* INIT sends a message with length of 1 - ignore it */
-	if (r == 1)
-		return 0;
-
-	CBUF_ADD(&sctp_con.cb, ret);
-	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
-					  page_address(sctp_con.rx_page),
-					  sctp_con.cb.base, sctp_con.cb.len,
-					  PAGE_CACHE_SIZE);
-	if (ret < 0)
-		goto out_close;
-	CBUF_EAT(&sctp_con.cb, ret);
-
-      out:
-	ret = 0;
-	goto out_ret;
-
-      out_resched:
-	lowcomms_data_ready(sctp_con.sock->sk, 0);
-	ret = 0;
-	schedule();
-	goto out_ret;
-
-      out_close:
-	if (ret != -EAGAIN)
-		log_print("error reading from sctp socket: %d", ret);
-      out_ret:
-	return ret;
-}
-
-/* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */
-static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
-{
-	mm_segment_t fs;
-	int result = 0;
-
-	fs = get_fs();
-	set_fs(get_ds());
-	if (num == 1)
-		result = sctp_con.sock->ops->bind(sctp_con.sock,
-					(struct sockaddr *) addr, addr_len);
-	else
-		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
-				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
-	set_fs(fs);
-
-	if (result < 0)
-		log_print("Can't bind to port %d addr number %d",
-			  dlm_config.tcp_port, num);
-
-	return result;
-}
-
-static void init_local(void)
-{
-	struct sockaddr_storage sas, *addr;
-	int i;
-
-	dlm_local_nodeid = dlm_our_nodeid();
-
-	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
-		if (dlm_our_addr(&sas, i))
-			break;
-
-		addr = kmalloc(sizeof(*addr), GFP_KERNEL);
-		if (!addr)
-			break;
-		memcpy(addr, &sas, sizeof(*addr));
-		dlm_local_addr[dlm_local_count++] = addr;
-	}
-}
-
-/* Initialise SCTP socket and bind to all interfaces */
-static int init_sock(void)
-{
-	mm_segment_t fs;
-	struct socket *sock = NULL;
-	struct sockaddr_storage localaddr;
-	struct sctp_event_subscribe subscribe;
-	int result = -EINVAL, num = 1, i, addr_len;
-
-	if (!dlm_local_count) {
-		init_local();
-		if (!dlm_local_count) {
-			log_print("no local IP address has been set");
-			goto out;
-		}
-	}
-
-	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
-				  IPPROTO_SCTP, &sock);
-	if (result < 0) {
-		log_print("Can't create comms socket, check SCTP is loaded");
-		goto out;
-	}
-
-	/* Listen for events */
-	memset(&subscribe, 0, sizeof(subscribe));
-	subscribe.sctp_data_io_event = 1;
-	subscribe.sctp_association_event = 1;
-	subscribe.sctp_send_failure_event = 1;
-	subscribe.sctp_shutdown_event = 1;
-	subscribe.sctp_partial_delivery_event = 1;
-
-	fs = get_fs();
-	set_fs(get_ds());
-	result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
-				       (char *)&subscribe, sizeof(subscribe));
-	set_fs(fs);
-
-	if (result < 0) {
-		log_print("Failed to set SCTP_EVENTS on socket: result=%d",
-			  result);
-		goto create_delsock;
-	}
-
-	/* Init con struct */
-	sock->sk->sk_user_data = &sctp_con;
-	sctp_con.sock = sock;
-	sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready;
-
-	/* Bind to all interfaces. */
-	for (i = 0; i < dlm_local_count; i++) {
-		memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
-		make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
-
-		result = add_bind_addr(&localaddr, addr_len, num);
-		if (result)
-			goto create_delsock;
-		++num;
-	}
-
-	result = sock->ops->listen(sock, 5);
-	if (result < 0) {
-		log_print("Can't set socket listening");
-		goto create_delsock;
-	}
-
-	return 0;
-
- create_delsock:
-	sock_release(sock);
-	sctp_con.sock = NULL;
- out:
-	return result;
-}
-
-
-static struct writequeue_entry *new_writequeue_entry(gfp_t allocation)
-{
-	struct writequeue_entry *entry;
-
-	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
-	if (!entry)
-		return NULL;
-
-	entry->page = alloc_page(allocation);
-	if (!entry->page) {
-		kfree(entry);
-		return NULL;
-	}
-
-	entry->offset = 0;
-	entry->len = 0;
-	entry->end = 0;
-	entry->users = 0;
-
-	return entry;
-}
-
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
-{
-	struct writequeue_entry *e;
-	int offset = 0;
-	int users = 0;
-	struct nodeinfo *ni;
-
-	if (!atomic_read(&accepting))
-		return NULL;
-
-	ni = nodeid2nodeinfo(nodeid, allocation);
-	if (!ni)
-		return NULL;
-
-	spin_lock(&ni->writequeue_lock);
-	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &ni->writequeue) ||
-	    (PAGE_CACHE_SIZE - e->end < len)) {
-		e = NULL;
-	} else {
-		offset = e->end;
-		e->end += len;
-		users = e->users++;
-	}
-	spin_unlock(&ni->writequeue_lock);
-
-	if (e) {
-	      got_one:
-		if (users == 0)
-			kmap(e->page);
-		*ppc = page_address(e->page) + offset;
-		return e;
-	}
-
-	e = new_writequeue_entry(allocation);
-	if (e) {
-		spin_lock(&ni->writequeue_lock);
-		offset = e->end;
-		e->end += len;
-		e->ni = ni;
-		users = e->users++;
-		list_add_tail(&e->list, &ni->writequeue);
-		spin_unlock(&ni->writequeue_lock);
-		goto got_one;
-	}
-	return NULL;
-}
-
-void dlm_lowcomms_commit_buffer(void *arg)
-{
-	struct writequeue_entry *e = (struct writequeue_entry *) arg;
-	int users;
-	struct nodeinfo *ni = e->ni;
-
-	if (!atomic_read(&accepting))
-		return;
-
-	spin_lock(&ni->writequeue_lock);
-	users = --e->users;
-	if (users)
-		goto out;
-	e->len = e->end - e->offset;
-	kunmap(e->page);
-	spin_unlock(&ni->writequeue_lock);
-
-	if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-		spin_lock_bh(&write_nodes_lock);
-		list_add_tail(&ni->write_list, &write_nodes);
-		spin_unlock_bh(&write_nodes_lock);
-		wake_up_process(send_task);
-	}
-	return;
-
-      out:
-	spin_unlock(&ni->writequeue_lock);
-	return;
-}
-
-static void free_entry(struct writequeue_entry *e)
-{
-	__free_page(e->page);
-	kfree(e);
-}
-
-/* Initiate an SCTP association. In theory we could just use sendmsg() on
-   the first IP address and it should work, but this allows us to set up the
-   association before sending any valuable data that we can't afford to lose.
-   It also keeps the send path clean as it can now always use the association ID */
-static void initiate_association(int nodeid)
-{
-	struct sockaddr_storage rem_addr;
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct msghdr outmessage;
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	int ret;
-	int addrlen;
-	char buf[1];
-	struct kvec iov[1];
-	struct nodeinfo *ni;
-
-	log_print("Initiating association with node %d", nodeid);
-
-	ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
-	if (!ni)
-		return;
-
-	if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) {
-		log_print("no address for nodeid %d", nodeid);
-		return;
-	}
-
-	make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
-
-	outmessage.msg_name = &rem_addr;
-	outmessage.msg_namelen = addrlen;
-	outmessage.msg_control = outcmsg;
-	outmessage.msg_controllen = sizeof(outcmsg);
-	outmessage.msg_flags = MSG_EOR;
-
-	iov[0].iov_base = buf;
-	iov[0].iov_len = 1;
-
-	/* Real INIT messages seem to cause trouble. Just send a 1 byte message
-	   we can afford to lose */
-	cmsg = CMSG_FIRSTHDR(&outmessage);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
-
-	outmessage.msg_controllen = cmsg->cmsg_len;
-	ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1);
-	if (ret < 0) {
-		log_print("send INIT to node failed: %d", ret);
-		/* Try again later */
-		clear_bit(NI_INIT_PENDING, &ni->flags);
-	}
-}
-
-/* Send a message */
-static int send_to_sock(struct nodeinfo *ni)
-{
-	int ret = 0;
-	struct writequeue_entry *e;
-	int len, offset;
-	struct msghdr outmsg;
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	struct kvec iov;
-
-        /* See if we need to init an association before we start
-	   sending precious messages */
-	spin_lock(&ni->lock);
-	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
-		spin_unlock(&ni->lock);
-		initiate_association(ni->nodeid);
-		return 0;
-	}
-	spin_unlock(&ni->lock);
-
-	outmsg.msg_name = NULL; /* We use assoc_id */
-	outmsg.msg_namelen = 0;
-	outmsg.msg_control = outcmsg;
-	outmsg.msg_controllen = sizeof(outcmsg);
-	outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR;
-
-	cmsg = CMSG_FIRSTHDR(&outmsg);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
-	sinfo->sinfo_assoc_id = ni->assoc_id;
-	outmsg.msg_controllen = cmsg->cmsg_len;
-
-	spin_lock(&ni->writequeue_lock);
-	for (;;) {
-		if (list_empty(&ni->writequeue))
-			break;
-		e = list_entry(ni->writequeue.next, struct writequeue_entry,
-			       list);
-		len = e->len;
-		offset = e->offset;
-		BUG_ON(len == 0 && e->users == 0);
-		spin_unlock(&ni->writequeue_lock);
-		kmap(e->page);
-
-		ret = 0;
-		if (len) {
-			iov.iov_base = page_address(e->page)+offset;
-			iov.iov_len = len;
-
-			ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1,
-					     len);
-			if (ret == -EAGAIN) {
-				sctp_con.eagain_flag = 1;
-				goto out;
-			} else if (ret < 0)
-				goto send_error;
-		} else {
-			/* Don't starve people filling buffers */
-			schedule();
-		}
-
-		spin_lock(&ni->writequeue_lock);
-		e->offset += ret;
-		e->len -= ret;
-
-		if (e->len == 0 && e->users == 0) {
-			list_del(&e->list);
-			free_entry(e);
-			continue;
-		}
-	}
-	spin_unlock(&ni->writequeue_lock);
- out:
-	return ret;
-
- send_error:
-	log_print("Error sending to node %d %d", ni->nodeid, ret);
-	spin_lock(&ni->lock);
-	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
-		ni->assoc_id = 0;
-		spin_unlock(&ni->lock);
-		initiate_association(ni->nodeid);
-	} else
-		spin_unlock(&ni->lock);
-
-	return ret;
-}
-
-/* Try to send any messages that are pending */
-static void process_output_queue(void)
-{
-	struct list_head *list;
-	struct list_head *temp;
-
-	spin_lock_bh(&write_nodes_lock);
-	list_for_each_safe(list, temp, &write_nodes) {
-		struct nodeinfo *ni =
-		    list_entry(list, struct nodeinfo, write_list);
-		clear_bit(NI_WRITE_PENDING, &ni->flags);
-		list_del(&ni->write_list);
-
-		spin_unlock_bh(&write_nodes_lock);
-
-		send_to_sock(ni);
-		spin_lock_bh(&write_nodes_lock);
-	}
-	spin_unlock_bh(&write_nodes_lock);
-}
-
-/* Called after we've had -EAGAIN and been woken up */
-static void refill_write_queue(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-
-		if (ni) {
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-		}
-	}
-}
-
-static void clean_one_writequeue(struct nodeinfo *ni)
-{
-	struct list_head *list;
-	struct list_head *temp;
-
-	spin_lock(&ni->writequeue_lock);
-	list_for_each_safe(list, temp, &ni->writequeue) {
-		struct writequeue_entry *e =
-			list_entry(list, struct writequeue_entry, list);
-		list_del(&e->list);
-		free_entry(e);
-	}
-	spin_unlock(&ni->writequeue_lock);
-}
-
-static void clean_writequeues(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-		if (ni)
-			clean_one_writequeue(ni);
-	}
-}
-
-
-static void dealloc_nodeinfo(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-		if (ni) {
-			idr_remove(&nodeinfo_idr, i);
-			kfree(ni);
-		}
-	}
-}
-
-int dlm_lowcomms_close(int nodeid)
-{
-	struct nodeinfo *ni;
-
-	ni = nodeid2nodeinfo(nodeid, 0);
-	if (!ni)
-		return -1;
-
-	spin_lock(&ni->lock);
-	if (ni->assoc_id) {
-		ni->assoc_id = 0;
-		/* Don't send shutdown here, sctp will just queue it
-		   till the node comes back up! */
-	}
-	spin_unlock(&ni->lock);
-
-	clean_one_writequeue(ni);
-	clear_bit(NI_INIT_PENDING, &ni->flags);
-	return 0;
-}
-
-static int write_list_empty(void)
-{
-	int status;
-
-	spin_lock_bh(&write_nodes_lock);
-	status = list_empty(&write_nodes);
-	spin_unlock_bh(&write_nodes_lock);
-
-	return status;
-}
-
-static int dlm_recvd(void *data)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	while (!kthread_should_stop()) {
-		int count = 0;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&lowcomms_recv_wait, &wait);
-		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
-			schedule();
-		remove_wait_queue(&lowcomms_recv_wait, &wait);
-		set_current_state(TASK_RUNNING);
-
-		if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
-			int ret;
-
-			do {
-				ret = receive_from_sock();
-
-				/* Don't starve out everyone else */
-				if (++count >= MAX_RX_MSG_COUNT) {
-					schedule();
-					count = 0;
-				}
-			} while (!kthread_should_stop() && ret >=0);
-		}
-		schedule();
-	}
-
-	return 0;
-}
-
-static int dlm_sendd(void *data)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (write_list_empty())
-			schedule();
-		set_current_state(TASK_RUNNING);
-
-		if (sctp_con.eagain_flag) {
-			sctp_con.eagain_flag = 0;
-			refill_write_queue();
-		}
-		process_output_queue();
-	}
-
-	remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
-	return 0;
-}
-
-static void daemons_stop(void)
-{
-	kthread_stop(recv_task);
-	kthread_stop(send_task);
-}
-
-static int daemons_start(void)
-{
-	struct task_struct *p;
-	int error;
-
-	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
-	error = IS_ERR(p);
-       	if (error) {
-		log_print("can't start dlm_recvd %d", error);
-		return error;
-	}
-	recv_task = p;
-
-	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
-	error = IS_ERR(p);
-       	if (error) {
-		log_print("can't start dlm_sendd %d", error);
-		kthread_stop(recv_task);
-		return error;
-	}
-	send_task = p;
-
-	return 0;
-}
-
-/*
- * This is quite likely to sleep...
- */
-int dlm_lowcomms_start(void)
-{
-	int error;
-
-	error = init_sock();
-	if (error)
-		goto fail_sock;
-	error = daemons_start();
-	if (error)
-		goto fail_sock;
-	atomic_set(&accepting, 1);
-	return 0;
-
- fail_sock:
-	close_connection();
-	return error;
-}
-
-/* Set all the activity flags to prevent any socket activity. */
-
-void dlm_lowcomms_stop(void)
-{
-	atomic_set(&accepting, 0);
-	sctp_con.flags = 0x7;
-	daemons_stop();
-	clean_writequeues();
-	close_connection();
-	dealloc_nodeinfo();
-	max_nodeid = 0;
-}
-
-int dlm_lowcomms_init(void)
-{
-	init_waitqueue_head(&lowcomms_recv_wait);
-	spin_lock_init(&write_nodes_lock);
-	INIT_LIST_HEAD(&write_nodes);
-	init_rwsem(&nodeinfo_lock);
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-	int i;
-
-	for (i = 0; i < dlm_local_count; i++)
-		kfree(dlm_local_addr[i]);
-	dlm_local_count = 0;
-	dlm_local_nodeid = 0;
-}
-
-- 
cgit v1.2.3


From 520698096436f7da5b9142e63e3bed5580c5f14e Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Nov 2006 09:49:02 -0600
Subject: [DLM] res_recover_locks_count not reset when recover_locks is aborted

Red Hat BZ 213684

If a node sends an lkb to the new master (RCOM_LOCK message) during
recovery and recovery is then aborted on both nodes before it gets a
reply, the res_recover_locks_count needs to be reset to 0 so that when the
subsequent recovery comes along and sends the lkb to the new master again
the assertion doesn't trigger that checks that counter is zero.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recover.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index a5e6d184872..cf9f6831bab 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -252,6 +252,7 @@ static void recover_list_clear(struct dlm_ls *ls)
 	spin_lock(&ls->ls_recover_list_lock);
 	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
 		list_del_init(&r->res_recover_list);
+		r->res_recover_locks_count = 0;
 		dlm_put_rsb(r);
 		ls->ls_recover_list_count--;
 	}
-- 
cgit v1.2.3


From 435618b75b82b5ee511cc01fcdda9c44adb2f4bd Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Nov 2006 09:45:56 -0600
Subject: [DLM] status messages ping-pong between unmounted nodes

Red Hat BZ 213682

If two nodes leave the lockspace (while unmounting the fs in the case of
gfs) after one has sent a STATUS message to the other, STATUS/STATUS_REPLY
messages will then ping-pong between the nodes when neither of them can
find the lockspace in question any longer.  We kill this by not sending
another STATUS message when we get a STATUS_REPLY for an unknown
lockspace.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 518239a8b1e..87b12f7d3d2 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -412,9 +412,10 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 
 	ls = dlm_find_lockspace_global(hd->h_lockspace);
 	if (!ls) {
-		log_print("lockspace %x from %d not found",
-			  hd->h_lockspace, nodeid);
-		send_ls_not_ready(nodeid, rc);
+		log_print("lockspace %x from %d type %x not found",
+			  hd->h_lockspace, nodeid, rc->rc_type);
+		if (rc->rc_type == DLM_RCOM_STATUS)
+			send_ls_not_ready(nodeid, rc);
 		return;
 	}
 
-- 
cgit v1.2.3


From d4400156d415540086c34a06e5d233122d6bf56a Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:55:56 -0600
Subject: [DLM] fix requestqueue race

Red Hat BZ 211914

There's a race between dlm_recoverd (1) enabling locking and (2) clearing
out the requestqueue, and dlm_recvd (1) checking if locking is enabled and
(2) adding a message to the requestqueue.  An order of recoverd(1),
recvd(1), recvd(2), recoverd(2) will result in a message being left on the
requestqueue.  The fix is to have dlm_recvd check if dlm_recoverd has
enabled locking after taking the mutex for the requestqueue and if it has
processing the message instead of queueing it.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lock.c         | 15 +++++++++++----
 fs/dlm/requestqueue.c | 21 +++++++++++++++++----
 fs/dlm/requestqueue.h |  2 +-
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3f2befa4797..6088a16926b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3028,10 +3028,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 
 	while (1) {
 		if (dlm_locking_stopped(ls)) {
-			if (!recovery)
-				dlm_add_requestqueue(ls, nodeid, hd);
-			error = -EINTR;
-			goto out;
+			if (recovery) {
+				error = -EINTR;
+				goto out;
+			}
+			error = dlm_add_requestqueue(ls, nodeid, hd);
+			if (error == -EAGAIN)
+				continue;
+			else {
+				error = -EINTR;
+				goto out;
+			}
 		}
 
 		if (lock_recovery_try(ls))
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089634a..0226d2a0a0f 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,26 +30,39 @@ struct rq_entry {
  * lockspace is enabled on some while still suspended on others.
  */
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 {
 	struct rq_entry *e;
 	int length = hd->h_length;
+	int rv = 0;
 
 	if (dlm_is_removed(ls, nodeid))
-		return;
+		return 0;
 
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
-		return;
+		return 0;
 	}
 
 	e->nodeid = nodeid;
 	memcpy(e->request, hd, length);
 
+	/* We need to check dlm_locking_stopped() after taking the mutex to
+	   avoid a race where dlm_recoverd enables locking and runs
+	   process_requestqueue between our earlier dlm_locking_stopped check
+	   and this addition to the requestqueue. */
+
 	mutex_lock(&ls->ls_requestqueue_mutex);
-	list_add_tail(&e->list, &ls->ls_requestqueue);
+	if (dlm_locking_stopped(ls))
+		list_add_tail(&e->list, &ls->ls_requestqueue);
+	else {
+		log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+		kfree(e);
+		rv = -EAGAIN;
+	}
 	mutex_unlock(&ls->ls_requestqueue_mutex);
+	return rv;
 }
 
 int dlm_process_requestqueue(struct dlm_ls *ls)
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 349f0d292d9..6a53ea03335 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -13,7 +13,7 @@
 #ifndef __REQUESTQUEUE_DOT_H__
 #define __REQUESTQUEUE_DOT_H__
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
 int dlm_process_requestqueue(struct dlm_ls *ls);
 void dlm_wait_requestqueue(struct dlm_ls *ls);
 void dlm_purge_requestqueue(struct dlm_ls *ls);
-- 
cgit v1.2.3


From 91c0dc93a1a6bbdd79707ed311e48b4397df177f Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:56:01 -0600
Subject: [DLM] fix aborted recovery during node removal

Red Hat BZ 211914

With the new cluster infrastructure, dlm recovery for a node removal can
be aborted and restarted for a node addition.  When this happens, the
restarted recovery isn't aware that it's doing recovery for the earlier
removal as well as the addition.  So, it then skips the recovery steps
only required when nodes are removed.  This can result in locks not being
purged for failed/removed nodes.  The fix is to check for removed nodes
for which recovery has not been completed at the start of a new recovery
sequence.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/member.c   | 8 ++++++++
 fs/dlm/recoverd.c | 7 +++++++
 2 files changed, 15 insertions(+)

(limited to 'fs')

diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index a3f7de7f3a8..85e2897bd74 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -186,6 +186,14 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	struct dlm_member *memb, *safe;
 	int i, error, found, pos = 0, neg = 0, low = -1;
 
+	/* previously removed members that we've not finished removing need to
+	   count as a negative change so the "neg" recovery steps will happen */
+
+	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+		log_debug(ls, "prev removed member %d", memb->nodeid);
+		neg++;
+	}
+
 	/* move departed members from ls_nodes to ls_nodes_gone */
 
 	list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 362e3eff4dc..4a1d6023fd9 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -164,6 +164,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		 */
 
 		dlm_recover_rsbs(ls);
+	} else {
+		/*
+		 * Other lockspace members may be going through the "neg" steps
+		 * while also adding us to the lockspace, in which case they'll
+		 * be looking for this status bit during dlm_recover_locks().
+		 */
+		dlm_set_recover_status(ls, DLM_RS_LOCKS);
 	}
 
 	dlm_release_root_list(ls);
-- 
cgit v1.2.3


From 2cdc98aaf072d573df10c503d3b3b0b74e2a6d06 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:56:08 -0600
Subject: [DLM] fix stopping unstarted recovery

Red Hat BZ 211914

When many nodes are joining a lockspace simultaneously, the dlm gets a
quick sequence of stop/start events, a pair for adding each node.
dlm_controld in user space sends dlm_recoverd in the kernel each stop and
start event.  dlm_controld will sometimes send the stop before
dlm_recoverd has had a chance to take up the previously queued start.  The
stop aborts the processing of the previous start by setting the
RECOVERY_STOP flag.  dlm_recoverd is erroneously clearing this flag and
ignoring the stop/abort if it happens to take up the start after the stop
meant to abort it.  The fix is to check the sequence number that's
incremented for each stop/start before clearing the flag.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recoverd.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 4a1d6023fd9..6e4ee94ce7d 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -219,6 +219,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	return error;
 }
 
+/* The dlm_ls_start() that created the rv we take here may already have been
+   stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
+   flag set. */
+
 static void do_ls_recovery(struct dlm_ls *ls)
 {
 	struct dlm_recover *rv = NULL;
@@ -226,7 +230,8 @@ static void do_ls_recovery(struct dlm_ls *ls)
 	spin_lock(&ls->ls_recover_lock);
 	rv = ls->ls_recover_args;
 	ls->ls_recover_args = NULL;
-	clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	if (rv && ls->ls_recover_seq == rv->seq)
+		clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
 	spin_unlock(&ls->ls_recover_lock);
 
 	if (rv) {
-- 
cgit v1.2.3


From 4b77f2c93d052adca8cc8690b9b5e7f8798f4ddd Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 1 Nov 2006 09:31:48 -0600
Subject: [DLM] do full recover_locks barrier

Red Hat BZ 211914

The previous patch "[DLM] fix aborted recovery during
node removal" was incomplete as discovered with further testing.  It set
the bit for the RS_LOCKS barrier but did not then wait for the barrier.
This is often ok, but sometimes it will cause yet another recovery hang.
If it's a new node that also has the lowest nodeid that skips the barrier
wait, then it misses the important step of collecting and reporting the
barrier status from the other nodes (which is the job of the low nodeid in
the barrier wait routine).

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recoverd.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 6e4ee94ce7d..8bb895ffd90 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -168,9 +168,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		/*
 		 * Other lockspace members may be going through the "neg" steps
 		 * while also adding us to the lockspace, in which case they'll
-		 * be looking for this status bit during dlm_recover_locks().
+		 * be doing the recover_locks (RS_LOCKS) barrier.
 		 */
 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
+
+		error = dlm_recover_locks_wait(ls);
+		if (error) {
+			log_error(ls, "recover_locks_wait failed %d", error);
+			goto fail;
+		}
 	}
 
 	dlm_release_root_list(ls);
-- 
cgit v1.2.3


From 6f90a8b1b87f97144911790390d56f695b59db9b Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Fri, 10 Nov 2006 14:16:27 -0600
Subject: [DLM] clear sbflags on lock master

RH BZ 211622

The ALTMODE flag can be set in the lock master's copy of the lock but
never cleared, so ALTMODE will also be returned in a subsequent conversion
of the lock when it shouldn't be.  This results in lock_dlm incorrectly
switching to the alternate lock mode when returning the result to gfs
which then asserts when it sees the wrong lock state.  The fix is to
propagate the cleared sbflags value to the master node when the lock is
requested.  QA's d_rwrandirectlarge test triggers this bug very quickly.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 6088a16926b..30878defaeb 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2372,6 +2372,7 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
 static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
 {
 	lkb->lkb_exflags = ms->m_exflags;
+	lkb->lkb_sbflags = ms->m_sbflags;
 	lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
 		         (ms->m_flags & 0x0000FFFF);
 }
-- 
cgit v1.2.3


From b98c95af01c10827e3443157651eb469071391a3 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Wed, 15 Nov 2006 12:29:24 -0500
Subject: [DLM] Fix DLM config

The attached patch fixes the DLM config so that it selects the chosen network
transport. It should fix the bug where DLM can be left selected when NET gets
unselected. This incorporates all the comments received about this patch.

Cc: Adrian Bunk <bunk@stusta.de>
Cc: Andrew Morton <akpm@osdl.org>
Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index c5985b883b2..b5654a284fe 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,10 +1,11 @@
 menu "Distributed Lock Manager"
-	depends on INET && IP_SCTP && EXPERIMENTAL
+	depends on EXPERIMENTAL && INET
 
 config DLM
 	tristate "Distributed Lock Manager (DLM)"
 	depends on IPV6 || IPV6=n
 	select CONFIGFS_FS
+	select IP_SCTP if DLM_SCTP
 	help
 	A general purpose distributed lock manager for kernel or userspace
 	applications.
-- 
cgit v1.2.3


From ab923031ceb95ec50ef33ccadf28663c660aa94c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Nov 2006 15:17:03 -0500
Subject: [GFS2] Fix memory allocation in glock.c

Change from GFP_KERNEL to GFP_NOFS as this was causing a
slow down when trying to push inodes from cache.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 746347a404c..edc21c8d7fa 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,7 +769,7 @@ restart:
 	} else {
 		spin_unlock(&gl->gl_spin);
 
-		new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL);
+		new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS);
 		if (!new_gh)
 			return;
 		set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
-- 
cgit v1.2.3


From 4cf1ed8144e740de27c6146c25d5d7ea26679cc5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Nov 2006 15:21:06 -0500
Subject: [GFS2] Tidy up bmap & fix boundary bug

This moves the locking for bmap into the bmap function itself
rather than using a wrapper function. It also fixes a bug where
the boundary flag was set on the wrong bh. Also the flags on
the mapped bh are reset earlier in the function to ensure that
they are 100% correct on the error path.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c | 117 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 54 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 06e3447ea13..8240c1ff94f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -423,12 +423,29 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 	return 0;
 }
 
+static inline void bmap_lock(struct inode *inode, int create)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	if (create)
+		down_write(&ip->i_rw_mutex);
+	else
+		down_read(&ip->i_rw_mutex);
+}
+
+static inline void bmap_unlock(struct inode *inode, int create)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	if (create)
+		up_write(&ip->i_rw_mutex);
+	else
+		up_read(&ip->i_rw_mutex);
+}
+
 /**
- * gfs2_block_pointers - Map a block from an inode to a disk block
+ * gfs2_block_map - Map a block from an inode to a disk block
  * @inode: The inode
  * @lblock: The logical block number
- * @map_bh: The bh to be mapped
- * @mp: metapath to use
+ * @bh_map: The bh to be mapped
  *
  * Find the block number on the current device which corresponds to an
  * inode's block. If the block had to be created, "new" will be set.
@@ -436,8 +453,8 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
  * Returns: errno
  */
 
-static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
-			       struct buffer_head *bh_map, struct metapath *mp)
+int gfs2_block_map(struct inode *inode, u64 lblock, int create,
+		   struct buffer_head *bh_map)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -451,51 +468,55 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 	u64 dblock = 0;
 	int boundary;
 	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
+	struct metapath mp;
+	u64 size;
 
 	BUG_ON(maxlen == 0);
 
 	if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
 		return 0;
 
+	bmap_lock(inode, create);
+	clear_buffer_mapped(bh_map);
+	clear_buffer_new(bh_map);
+	clear_buffer_boundary(bh_map);
 	bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
-
-	height = calc_tree_height(ip, (lblock + 1) * bsize);
-	if (ip->i_di.di_height < height) {
-		if (!create)
-			return 0;
-
-		error = build_height(inode, height);
-		if (error)
-			return error;
+	size = (lblock + 1) * bsize;
+
+	if (size > ip->i_di.di_size) {
+		height = calc_tree_height(ip, size);
+		if (ip->i_di.di_height < height) {
+			if (!create)
+				goto out_ok;
+	
+			error = build_height(inode, height);
+			if (error)
+				goto out_fail;
+		}
 	}
 
-	find_metapath(ip, lblock, mp);
+	find_metapath(ip, lblock, &mp);
 	end_of_metadata = ip->i_di.di_height - 1;
-
 	error = gfs2_meta_inode_buffer(ip, &bh);
 	if (error)
-		return error;
+		goto out_fail;
 
 	for (x = 0; x < end_of_metadata; x++) {
-		lookup_block(ip, bh, x, mp, create, &new, &dblock);
+		lookup_block(ip, bh, x, &mp, create, &new, &dblock);
 		brelse(bh);
 		if (!dblock)
-			return 0;
+			goto out_ok;
 
 		error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
 		if (error)
-			return error;
+			goto out_fail;
 	}
 
-	boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock);
-	clear_buffer_mapped(bh_map);
-	clear_buffer_new(bh_map);
-	clear_buffer_boundary(bh_map);
-
+	boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock);
 	if (dblock) {
 		map_bh(bh_map, inode->i_sb, dblock);
 		if (boundary)
-			set_buffer_boundary(bh);
+			set_buffer_boundary(bh_map);
 		if (new) {
 			struct buffer_head *dibh;
 			error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -510,8 +531,8 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 		while(--maxlen && !buffer_boundary(bh_map)) {
 			u64 eblock;
 
-			mp->mp_list[end_of_metadata]++;
-			boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock);
+			mp.mp_list[end_of_metadata]++;
+			boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
 			if (eblock != ++dblock)
 				break;
 			bh_map->b_size += (1 << inode->i_blkbits);
@@ -521,43 +542,15 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 	}
 out_brelse:
 	brelse(bh);
-	return 0;
-}
-
-
-static inline void bmap_lock(struct inode *inode, int create)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	if (create)
-		down_write(&ip->i_rw_mutex);
-	else
-		down_read(&ip->i_rw_mutex);
-}
-
-static inline void bmap_unlock(struct inode *inode, int create)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	if (create)
-		up_write(&ip->i_rw_mutex);
-	else
-		up_read(&ip->i_rw_mutex);
-}
-
-int gfs2_block_map(struct inode *inode, u64 lblock, int create,
-		   struct buffer_head *bh)
-{
-	struct metapath mp;
-	int ret;
-
-	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, bh, &mp);
+out_ok:
+	error = 0;
+out_fail:
 	bmap_unlock(inode, create);
-	return ret;
+	return error;
 }
 
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 {
-	struct metapath mp;
 	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 	int ret;
 	int create = *new;
@@ -567,9 +560,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 	BUG_ON(!new);
 
 	bh.b_size = 1 << (inode->i_blkbits + 5);
-	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp);
-	bmap_unlock(inode, create);
+	ret = gfs2_block_map(inode, lblock, create, &bh);
 	*extlen = bh.b_size >> inode->i_blkbits;
 	*dblock = bh.b_blocknr;
 	if (buffer_new(&bh))
-- 
cgit v1.2.3


From 175011cf6edddea32e5f5e0e04434104cc348de9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 16 Nov 2006 10:58:55 -0500
Subject: [GFS2] Remove unused sysfs files

Four of the sysfs files are unused and can therefore be removed.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/sys.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0e0ec988f73..983eaf1e06b 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -426,9 +426,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-TUNE_ATTR(ilimit, 0);
-TUNE_ATTR(ilimit_tries, 0);
-TUNE_ATTR(ilimit_min, 0);
 TUNE_ATTR(demote_secs, 0);
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
@@ -447,7 +444,6 @@ TUNE_ATTR(quota_simul_sync, 1);
 TUNE_ATTR(quota_cache_secs, 1);
 TUNE_ATTR(max_atomic_write, 1);
 TUNE_ATTR(stall_secs, 1);
-TUNE_ATTR(entries_per_readdir, 1);
 TUNE_ATTR(greedy_default, 1);
 TUNE_ATTR(greedy_quantum, 1);
 TUNE_ATTR(greedy_max, 1);
@@ -459,9 +455,6 @@ TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
-	&tune_attr_ilimit.attr,
-	&tune_attr_ilimit_tries.attr,
-	&tune_attr_ilimit_min.attr,
 	&tune_attr_demote_secs.attr,
 	&tune_attr_incore_log_blocks.attr,
 	&tune_attr_log_flush_secs.attr,
@@ -478,7 +471,6 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_quota_cache_secs.attr,
 	&tune_attr_max_atomic_write.attr,
 	&tune_attr_stall_secs.attr,
-	&tune_attr_entries_per_readdir.attr,
 	&tune_attr_greedy_default.attr,
 	&tune_attr_greedy_quantum.attr,
 	&tune_attr_greedy_max.attr,
-- 
cgit v1.2.3


From dcd2479959c79d44f5dd77e71672e70f1f8b1f06 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 16 Nov 2006 11:08:16 -0500
Subject: [GFS2] Remove unused function from inode.c

The gfs2_glock_nq_m_atime function is unused in so far as its only
ever called with num_gh = 1, and this falls through to the
gfs2_glock_nq_atime function, so we might as well call that directly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c       | 86 ---------------------------------------------------
 fs/gfs2/inode.h       |  4 ---
 fs/gfs2/ops_address.c |  8 ++---
 fs/gfs2/ops_file.c    |  2 +-
 4 files changed, 5 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ea9ca239c87..ce7f8330690 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1234,92 +1234,6 @@ fail:
 	return error;
 }
 
-/**
- * glock_compare_atime - Compare two struct gfs2_glock structures for sort
- * @arg_a: the first structure
- * @arg_b: the second structure
- *
- * Returns: 1 if A > B
- *         -1 if A < B
- *          0 if A == B
- */
-
-static int glock_compare_atime(const void *arg_a, const void *arg_b)
-{
-	const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
-	const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
-	const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
-	const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
-
-	if (a->ln_number > b->ln_number)
-		return 1;
-	if (a->ln_number < b->ln_number)
-		return -1;
-	if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
-		return 1;
-	if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME))
-		return 1;
-
-	return 0;
-}
-
-/**
- * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
- *      atime update
- * @num_gh: the number of structures
- * @ghs: an array of struct gfs2_holder structures
- *
- * Returns: 0 on success (all glocks acquired),
- *          errno on failure (no glocks acquired)
- */
-
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
-{
-	struct gfs2_holder **p;
-	unsigned int x;
-	int error = 0;
-
-	if (!num_gh)
-		return 0;
-
-	if (num_gh == 1) {
-		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
-		if (ghs->gh_flags & GL_ATIME)
-			error = gfs2_glock_nq_atime(ghs);
-		else
-			error = gfs2_glock_nq(ghs);
-		return error;
-	}
-
-	p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	for (x = 0; x < num_gh; x++)
-		p[x] = &ghs[x];
-
-	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
-
-	for (x = 0; x < num_gh; x++) {
-		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
-
-		if (p[x]->gh_flags & GL_ATIME)
-			error = gfs2_glock_nq_atime(p[x]);
-		else
-			error = gfs2_glock_nq(p[x]);
-
-		if (error) {
-			while (x--)
-				gfs2_glock_dq(p[x]);
-			break;
-		}
-	}
-
-	kfree(p);
-	return error;
-}
-
-
 static int
 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 46917edfdd6..b57f448b15b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -50,12 +50,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		   struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
-
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
-
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 
 #endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 2f7ef980d02..8676c39d0d7 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -217,7 +217,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
 		}
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
 		do_unlock = 1;
-		error = gfs2_glock_nq_m_atime(1, &gh);
+		error = gfs2_glock_nq_atime(&gh);
 		if (unlikely(error))
 			goto out_unlock;
 	}
@@ -282,7 +282,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
 				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
 		do_unlock = 1;
-		ret = gfs2_glock_nq_m_atime(1, &gh);
+		ret = gfs2_glock_nq_atime(&gh);
 		if (ret == GLR_TRYFAILED)
 			goto out_noerror;
 		if (unlikely(ret))
@@ -354,7 +354,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
-	error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
+	error = gfs2_glock_nq_atime(&ip->i_gh);
 	if (unlikely(error)) {
 		if (error == GLR_TRYFAILED)
 			error = AOP_TRUNCATED_PAGE;
@@ -609,7 +609,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	 * on this path. All we need change is atime.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	rv = gfs2_glock_nq_m_atime(1, &gh);
+	rv = gfs2_glock_nq_atime(&gh);
 	if (rv)
 		goto out;
 
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index eabf6c61a96..c2be216eb92 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -253,7 +253,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	u32 fsflags;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	error = gfs2_glock_nq_m_atime(1, &gh);
+	error = gfs2_glock_nq_atime(&gh);
 	if (error)
 		return error;
 
-- 
cgit v1.2.3


From 5e7d65cd9d3819512b059f4260de0119b985454c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 17 Nov 2006 12:27:44 -0500
Subject: [GFS2] Make sentinel dirents compatible with gfs1

When deleting directory entries, we set the inum.no_addr to zero
in a dirent when its the first dirent in a block and thus cannot
be merged into the previous dirent as is the usual case. In gfs1,
inum.no_formal_ino was used instead.

This patch changes gfs2 to set both inum.no_addr and inum.no_formal_ino
to zero. It also changes the test from just looking at inum.no_addr to
look at both inum.no_addr and inum.no_formal_ino and a sentinel is
now considered to be a dirent in which _either_ (or both) of them
is set to zero.

This resolves Red Hat bugzillas: #215809, #211465

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a2923fb91bb..0fdcb7713cd 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -340,10 +340,15 @@ fail:
 	return (copied) ? copied : error;
 }
 
+static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
+{
+	return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
+}
+
 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
 				     const struct qstr *name, int ret)
 {
-	if (dent->de_inum.no_addr != 0 &&
+	if (!gfs2_dirent_sentinel(dent) &&
 	    be32_to_cpu(dent->de_hash) == name->hash &&
 	    be16_to_cpu(dent->de_name_len) == name->len &&
 	    memcmp(dent+1, name->name, name->len) == 0)
@@ -388,7 +393,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
 	unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 	unsigned totlen = be16_to_cpu(dent->de_rec_len);
 
-	if (!dent->de_inum.no_addr)
+	if (gfs2_dirent_sentinel(dent))
 		actual = GFS2_DIRENT_SIZE(0);
 	if (totlen - actual >= required)
 		return 1;
@@ -405,7 +410,7 @@ static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
 			      void *opaque)
 {
 	struct dirent_gather *g = opaque;
-	if (dent->de_inum.no_addr) {
+	if (!gfs2_dirent_sentinel(dent)) {
 		g->pdent[g->offset++] = dent;
 	}
 	return 0;
@@ -433,10 +438,10 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
 	if (unlikely(offset + size > len))
 		goto error;
 	msg = "zero inode number";
-	if (unlikely(!first && !dent->de_inum.no_addr))
+	if (unlikely(!first && gfs2_dirent_sentinel(dent)))
 		goto error;
 	msg = "name length is greater than space in dirent";
-	if (dent->de_inum.no_addr &&
+	if (!gfs2_dirent_sentinel(dent) &&
 	    unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
 		     size))
 		goto error;
@@ -598,7 +603,7 @@ static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
 		return ret;
 
         /* Only the first dent could ever have de_inum.no_addr == 0 */
-	if (!tmp->de_inum.no_addr) {
+	if (gfs2_dirent_sentinel(tmp)) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -621,7 +626,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 {
 	u16 cur_rec_len, prev_rec_len;
 
-	if (!cur->de_inum.no_addr) {
+	if (gfs2_dirent_sentinel(cur)) {
 		gfs2_consist_inode(dip);
 		return;
 	}
@@ -633,7 +638,8 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 	   out the inode number and return.  */
 
 	if (!prev) {
-		cur->de_inum.no_addr = 0;	/* No endianess worries */
+		cur->de_inum.no_addr = 0;
+		cur->de_inum.no_formal_ino = 0;
 		return;
 	}
 
@@ -664,7 +670,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
 	struct gfs2_dirent *ndent;
 	unsigned offset = 0, totlen;
 
-	if (dent->de_inum.no_addr)
+	if (!gfs2_dirent_sentinel(dent))
 		offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 	totlen = be16_to_cpu(dent->de_rec_len);
 	BUG_ON(offset + name->len > totlen);
@@ -1002,7 +1008,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 		if (dirent_next(dip, obh, &next))
 			next = NULL;
 
-		if (dent->de_inum.no_addr &&
+		if (!gfs2_dirent_sentinel(dent) &&
 		    be32_to_cpu(dent->de_hash) < divider) {
 			struct qstr str;
 			str.name = (char*)(dent+1);
-- 
cgit v1.2.3


From fa2ecfc5e11b12f25b67f9c84ac6b0e74a6a0115 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Nov 2006 10:04:49 -0500
Subject: [GFS2] Fix Kconfig wrt CRC32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GFS2 requires the CRC32 library function. This was reported by
Toralf Förster.

Cc: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 8c27de8b956..c0791cbacad 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -2,6 +2,7 @@ config GFS2_FS
 	tristate "GFS2 file system support"
 	depends on EXPERIMENTAL
 	select FS_POSIX_ACL
+	select CRC32
 	help
 	A cluster filesystem.
 
-- 
cgit v1.2.3


From 1a14d3a68f04527546121eb7b45187ff6af63151 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Nov 2006 10:37:45 -0500
Subject: [GFS2] Simplify glops functions

The go_sync callback took two flags, but one of them was set on every
call, so this patch removes once of the flags and makes the previously
conditional operations (on this flag), unconditional.

The go_inval callback took three flags, each of which was set on every
call to it. This patch removes the flags and makes the operations
unconditional, which makes the logic rather more obvious.

Two now unused flags are also removed from incore.h.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c  | 10 +++++-----
 fs/gfs2/glops.c  | 42 +++++++++++-------------------------------
 fs/gfs2/incore.h | 25 +++++++++++--------------
 fs/gfs2/super.c  |  2 +-
 4 files changed, 28 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index edc21c8d7fa..b8ba4d5c1d9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -847,12 +847,12 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 
 	if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
 		if (glops->go_inval)
-			glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+			glops->go_inval(gl, DIO_METADATA);
 	} else if (gl->gl_state == LM_ST_DEFERRED) {
 		/* We might not want to do this here.
 		   Look at moving to the inode glops. */
 		if (glops->go_inval)
-			glops->go_inval(gl, DIO_DATA);
+			glops->go_inval(gl, 0);
 	}
 
 	/*  Deal with each possible exit condition  */
@@ -954,7 +954,7 @@ void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
 	gfs2_assert_warn(sdp, state != gl->gl_state);
 
 	if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
-		glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+		glops->go_sync(gl);
 
 	gfs2_glock_hold(gl);
 	gl->gl_req_bh = xmote_bh;
@@ -995,7 +995,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	state_change(gl, LM_ST_UNLOCKED);
 
 	if (glops->go_inval)
-		glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+		glops->go_inval(gl, DIO_METADATA);
 
 	if (gh) {
 		spin_lock(&gl->gl_spin);
@@ -1041,7 +1041,7 @@ void gfs2_glock_drop_th(struct gfs2_glock *gl)
 	gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
 
 	if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
-		glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+		glops->go_sync(gl);
 
 	gfs2_glock_hold(gl);
 	gl->gl_req_bh = drop_bh;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b92de0af0bf..60561ca070c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -173,23 +173,18 @@ static void gfs2_page_writeback(struct gfs2_glock *gl)
 /**
  * meta_go_sync - sync out the metadata for this glock
  * @gl: the glock
- * @flags: DIO_*
  *
  * Called when demoting or unlocking an EX glock.  We must flush
  * to disk all dirty buffers/pages relating to this glock, and must not
  * not return to caller to demote/unlock the glock until I/O is complete.
  */
 
-static void meta_go_sync(struct gfs2_glock *gl, int flags)
+static void meta_go_sync(struct gfs2_glock *gl)
 {
-	if (!(flags & DIO_METADATA))
-		return;
-
 	if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
 		gfs2_log_flush(gl->gl_sbd, gl);
 		gfs2_meta_sync(gl);
-		if (flags & DIO_RELEASE)
-			gfs2_ail_empty_gl(gl);
+		gfs2_ail_empty_gl(gl);
 	}
 
 }
@@ -264,31 +259,18 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
 /**
  * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
  * @gl: the glock protecting the inode
- * @flags:
  *
  */
 
-static void inode_go_sync(struct gfs2_glock *gl, int flags)
+static void inode_go_sync(struct gfs2_glock *gl)
 {
-	int meta = (flags & DIO_METADATA);
-	int data = (flags & DIO_DATA);
-
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		if (meta && data) {
-			gfs2_page_writeback(gl);
-			gfs2_log_flush(gl->gl_sbd, gl);
-			gfs2_meta_sync(gl);
-			gfs2_page_wait(gl);
-			clear_bit(GLF_DIRTY, &gl->gl_flags);
-		} else if (meta) {
-			gfs2_log_flush(gl->gl_sbd, gl);
-			gfs2_meta_sync(gl);
-		} else if (data) {
-			gfs2_page_writeback(gl);
-			gfs2_page_wait(gl);
-		}
-		if (flags & DIO_RELEASE)
-			gfs2_ail_empty_gl(gl);
+		gfs2_page_writeback(gl);
+		gfs2_log_flush(gl->gl_sbd, gl);
+		gfs2_meta_sync(gl);
+		gfs2_page_wait(gl);
+		clear_bit(GLF_DIRTY, &gl->gl_flags);
+		gfs2_ail_empty_gl(gl);
 	}
 }
 
@@ -302,15 +284,13 @@ static void inode_go_sync(struct gfs2_glock *gl, int flags)
 static void inode_go_inval(struct gfs2_glock *gl, int flags)
 {
 	int meta = (flags & DIO_METADATA);
-	int data = (flags & DIO_DATA);
 
 	if (meta) {
 		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
 		set_bit(GIF_INVALID, &ip->i_flags);
 	}
-	if (data)
-		gfs2_page_inval(gl);
+	gfs2_page_inval(gl);
 }
 
 /**
@@ -494,7 +474,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
 	if (gl->gl_state != LM_ST_UNLOCKED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 		gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
-		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 		error = gfs2_find_jhead(sdp->sd_jdesc, &head);
 		if (error)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 227a74dc5ec..734421edae8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -14,8 +14,6 @@
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
-#define DIO_DATA	0x00000040
-#define DIO_RELEASE	0x00000080
 #define DIO_ALL		0x00000100
 
 struct gfs2_log_operations;
@@ -103,18 +101,17 @@ struct gfs2_bufdata {
 };
 
 struct gfs2_glock_operations {
-	void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
-			     int flags);
-	void (*go_xmote_bh) (struct gfs2_glock * gl);
-	void (*go_drop_th) (struct gfs2_glock * gl);
-	void (*go_drop_bh) (struct gfs2_glock * gl);
-	void (*go_sync) (struct gfs2_glock * gl, int flags);
-	void (*go_inval) (struct gfs2_glock * gl, int flags);
-	int (*go_demote_ok) (struct gfs2_glock * gl);
-	int (*go_lock) (struct gfs2_holder * gh);
-	void (*go_unlock) (struct gfs2_holder * gh);
-	void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
-	void (*go_greedy) (struct gfs2_glock * gl);
+	void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
+	void (*go_xmote_bh) (struct gfs2_glock *gl);
+	void (*go_drop_th) (struct gfs2_glock *gl);
+	void (*go_drop_bh) (struct gfs2_glock *gl);
+	void (*go_sync) (struct gfs2_glock *gl);
+	void (*go_inval) (struct gfs2_glock *gl, int flags);
+	int (*go_demote_ok) (struct gfs2_glock *gl);
+	int (*go_lock) (struct gfs2_holder *gh);
+	void (*go_unlock) (struct gfs2_holder *gh);
+	void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
+	void (*go_greedy) (struct gfs2_glock *gl);
 	const int go_type;
 };
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0ef83172708..1408c5f3137 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -517,7 +517,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		return error;
 
 	gfs2_meta_cache_flush(ip);
-	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 	error = gfs2_find_jhead(sdp->sd_jdesc, &head);
 	if (error)
-- 
cgit v1.2.3


From 28626e2078571c4b776a17eaa486bbd2b7dfe2cd Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 22 Nov 2006 11:13:21 -0500
Subject: [GFS2] Fix glock ordering on inode creation

The lock order here should be parent -> child rather than
numeric order.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ce7f8330690..d122074c45e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -870,33 +870,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock;
 
-	if (inum.no_addr < dip->i_num.no_addr) {
-		gfs2_glock_dq(ghs);
-
-		error = gfs2_glock_nq_num(sdp, inum.no_addr,
-					  &gfs2_inode_glops, LM_ST_EXCLUSIVE,
-					  GL_SKIP, ghs + 1);
-		if (error) {
-			return ERR_PTR(error);
-		}
-
-		gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
-		error = gfs2_glock_nq(ghs);
-		if (error) {
-			gfs2_glock_dq_uninit(ghs + 1);
-			return ERR_PTR(error);
-		}
-
-		error = create_ok(dip, name, mode);
-		if (error)
-			goto fail_gunlock2;
-	} else {
-		error = gfs2_glock_nq_num(sdp, inum.no_addr,
-					  &gfs2_inode_glops, LM_ST_EXCLUSIVE,
-					  GL_SKIP, ghs + 1);
-		if (error)
-			goto fail_gunlock;
-	}
+	error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
+				  LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+	if (error)
+		goto fail_gunlock;
 
 	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
 	if (error)
-- 
cgit v1.2.3


From ae619320b22f8e0b2bbe4a3a5ac2f9ccf08d7ec2 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 22 Nov 2006 11:28:47 -0500
Subject: [GFS2] mark_inode_dirty after write to stuffed file

Writes to stuffed files were not being marked dirty correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8676c39d0d7..d8d69a72a10 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -472,8 +472,10 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 
 		SetPageUptodate(page);
 
-		if (inode->i_size < file_size)
+		if (inode->i_size < file_size) {
 			i_size_write(inode, file_size);
+			mark_inode_dirty(inode);
+		}
 	} else {
 		if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
 		    gfs2_is_jdata(ip))
-- 
cgit v1.2.3


From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 10:51:34 -0500
Subject: [GFS2] Fix journal flush problem

This fixes a bug which resulted in poor performance due to flushing
the journal too often. The code path in question was via the inode_go_sync()
function in glops.c. The solution is not to flush the journal immediately
when inodes are ejected from memory, but batch up the work for glockd to
deal with later on. This means that glocks may now live on beyond the end of
the lifetime of their inodes (but not very much longer in the normal case).

Also fixed in this patch is a bug (which was hidden by the bug mentioned above) in
calculation of the number of free journal blocks.

The gfs2_logd process has been altered to be more responsive to the journal
filling up. We now wake it up when the number of uncommitted journal blocks
has reached the threshold level rather than trying to flush directly at the
end of each transaction. This again means doing fewer, but larger, log
flushes in general.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/daemon.c    |  7 ++--
 fs/gfs2/glock.c     | 17 +---------
 fs/gfs2/glock.h     |  1 -
 fs/gfs2/glops.c     | 93 ++++++++++++++---------------------------------------
 fs/gfs2/log.c       | 17 ++++++----
 fs/gfs2/meta_io.c   |  3 ++
 fs/gfs2/ops_super.c |  7 ++--
 7 files changed, 46 insertions(+), 99 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68d468..683cb5bda87 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
 	struct gfs2_sbd *sdp = data;
 	struct gfs2_holder ji_gh;
 	unsigned long t;
+	int need_flush;
 
 	while (!kthread_should_stop()) {
 		/* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
 		    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
 
 		gfs2_ail1_empty(sdp, DIO_ALL);
-
-		if (time_after_eq(jiffies, t)) {
+		gfs2_log_lock(sdp);
+		need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
+		gfs2_log_unlock(sdp);
+		if (need_flush || time_after_eq(jiffies, t)) {
 			gfs2_log_flush(sdp, NULL);
 			sdp->sd_log_flush_time = jiffies;
 		}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8ba4d5c1d9..3c2ff81c84e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -785,21 +785,6 @@ out:
 		gfs2_holder_put(new_gh);
 }
 
-void gfs2_glock_inode_squish(struct inode *inode)
-{
-	struct gfs2_holder gh;
-	struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
-	gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
-	set_bit(HIF_DEMOTE, &gh.gh_iflags);
-	spin_lock(&gl->gl_spin);
-	gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
-	list_add_tail(&gh.gh_list, &gl->gl_waiters2);
-	run_queue(gl);
-	spin_unlock(&gl->gl_spin);
-	wait_for_completion(&gh.gh_wait);
-	gfs2_holder_uninit(&gh);
-}
-
 /**
  * state_change - record that the glock is now in a different state
  * @gl: the glock
@@ -1920,7 +1905,7 @@ out:
 
 static void scan_glock(struct gfs2_glock *gl)
 {
-	if (gl->gl_ops == &gfs2_inode_glops)
+	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
 		return;
 
 	if (gfs2_glmutex_trylock(gl)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a331bf8175e..fb39108fc05 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
 			     const struct gfs2_glock_operations *glops,
 			     unsigned int state, int flags);
-void gfs2_glock_inode_squish(struct inode *inode);
 
 /**
  * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 60561ca070c..b068d10bcb6 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -106,70 +106,6 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
 	clear_bit(GIF_SW_PAGED, &ip->i_flags);
 }
 
-/**
- * gfs2_page_inval - Invalidate all pages associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_page_inval(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip;
-	struct inode *inode;
-
-	ip = gl->gl_object;
-	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(inode->i_mode))
-		return;
-
-	truncate_inode_pages(inode->i_mapping, 0);
-	gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
-	clear_bit(GIF_PAGED, &ip->i_flags);
-}
-
-/**
- * gfs2_page_wait - Wait for writeback of data
- * @gl: the glock
- *
- * Syncs data (not metadata) for a regular file.
- * No-op for all other types.
- */
-
-static void gfs2_page_wait(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip = gl->gl_object;
-	struct inode *inode = &ip->i_inode;
-	struct address_space *mapping = inode->i_mapping;
-	int error;
-
-	if (!S_ISREG(inode->i_mode))
-		return;
-
-	error = filemap_fdatawait(mapping);
-
-	/* Put back any errors cleared by filemap_fdatawait()
-	   so they can be caught by someone who can pass them
-	   up to user space. */
-
-	if (error == -ENOSPC)
-		set_bit(AS_ENOSPC, &mapping->flags);
-	else if (error)
-		set_bit(AS_EIO, &mapping->flags);
-
-}
-
-static void gfs2_page_writeback(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip = gl->gl_object;
-	struct inode *inode = &ip->i_inode;
-	struct address_space *mapping = inode->i_mapping;
-
-	if (!S_ISREG(inode->i_mode))
-		return;
-
-	filemap_fdatawrite(mapping);
-}
-
 /**
  * meta_go_sync - sync out the metadata for this glock
  * @gl: the glock
@@ -264,11 +200,24 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
 
 static void inode_go_sync(struct gfs2_glock *gl)
 {
+	struct gfs2_inode *ip = gl->gl_object;
+
+	if (ip && !S_ISREG(ip->i_inode.i_mode))
+		ip = NULL;
+
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		gfs2_page_writeback(gl);
 		gfs2_log_flush(gl->gl_sbd, gl);
+		if (ip)
+			filemap_fdatawrite(ip->i_inode.i_mapping);
 		gfs2_meta_sync(gl);
-		gfs2_page_wait(gl);
+		if (ip) {
+			struct address_space *mapping = ip->i_inode.i_mapping;
+			int error = filemap_fdatawait(mapping);
+			if (error == -ENOSPC)
+				set_bit(AS_ENOSPC, &mapping->flags);
+			else if (error)
+				set_bit(AS_EIO, &mapping->flags);
+		}
 		clear_bit(GLF_DIRTY, &gl->gl_flags);
 		gfs2_ail_empty_gl(gl);
 	}
@@ -283,14 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 static void inode_go_inval(struct gfs2_glock *gl, int flags)
 {
+	struct gfs2_inode *ip = gl->gl_object;
 	int meta = (flags & DIO_METADATA);
 
 	if (meta) {
-		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
-		set_bit(GIF_INVALID, &ip->i_flags);
+		if (ip)
+			set_bit(GIF_INVALID, &ip->i_flags);
+	}
+
+	if (ip && S_ISREG(ip->i_inode.i_mode)) {
+		truncate_inode_pages(ip->i_inode.i_mapping, 0);
+		gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages);
+		clear_bit(GIF_PAGED, &ip->i_flags);
 	}
-	gfs2_page_inval(gl);
 }
 
 /**
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0cace3da9db..6456fc39aac 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -261,6 +261,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
  *
+ * Note that we never give out the last 6 blocks of the journal. Thats
+ * due to the fact that there is are a small number of header blocks
+ * associated with each log flush. The exact number can't be known until
+ * flush time, so we ensure that we have just enough free blocks at all
+ * times to avoid running out during a log flush.
+ *
  * Returns: errno
  */
 
@@ -274,7 +280,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 
 	mutex_lock(&sdp->sd_log_reserve_mutex);
 	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= blks) {
+	while(sdp->sd_log_blks_free <= (blks + 6)) {
 		gfs2_log_unlock(sdp);
 		gfs2_ail1_empty(sdp, 0);
 		gfs2_log_flush(sdp, NULL);
@@ -643,12 +649,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	up_read(&sdp->sd_log_flush_lock);
 
 	gfs2_log_lock(sdp);
-	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
-		gfs2_log_unlock(sdp);
-		gfs2_log_flush(sdp, NULL);
-	} else {
-		gfs2_log_unlock(sdp);
-	}
+	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
+		wake_up_process(sdp->sd_logd_process);
+	gfs2_log_unlock(sdp);
 }
 
 /**
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3912d6a4b1e..939a09f6e88 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -472,6 +472,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 	struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
 	int in_cache = 0;
 
+	BUG_ON(!gl);
+	BUG_ON(!sdp);
+
 	spin_lock(&ip->i_spin);
 	if (*bh_slot && (*bh_slot)->b_blocknr == num) {
 		bh = *bh_slot;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 86351756922..7685b46f934 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb)
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
 	sb->s_dirt = 0;
-	gfs2_log_flush(sb->s_fs_info, NULL);
+	if (wait)
+		gfs2_log_flush(sb->s_fs_info, NULL);
 	return 0;
 }
 
@@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode)
 	 */
 	if (inode->i_private) {
 		struct gfs2_inode *ip = GFS2_I(inode);
-		gfs2_glock_inode_squish(inode);
-		gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
 		ip->i_gl->gl_object = NULL;
 		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
@@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (!inode->i_private)
 		goto out;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
-- 
cgit v1.2.3


From a25311c8e0b7071b129ca9a9e49e22eeaf620864 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 11:06:35 -0500
Subject: [GFS2] Move gfs2_meta_syncfs() into log.c

By moving gfs2_meta_syncfs() into log.c, gfs2_ail1_start()
can be made static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c     | 21 ++++++++++++++++++++-
 fs/gfs2/log.h     |  2 +-
 fs/gfs2/meta_io.c | 17 -----------------
 fs/gfs2/meta_io.h |  1 -
 4 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6456fc39aac..7713d591867 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -15,6 +15,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/delay.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -142,7 +143,7 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 	return list_empty(&ai->ai_ail1_list);
 }
 
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
+static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
 {
 	struct list_head *head = &sdp->sd_ail1_list;
 	u64 sync_gen;
@@ -689,3 +690,21 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	up_write(&sdp->sd_log_flush_lock);
 }
 
+
+/**
+ * gfs2_meta_syncfs - sync all the buffers in a filesystem
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
+{
+	gfs2_log_flush(sdp, NULL);
+	for (;;) {
+		gfs2_ail1_start(sdp, DIO_ALL);
+		if (gfs2_ail1_empty(sdp, DIO_ALL))
+			break;
+		msleep(10);
+	}
+}
+
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 7f5737d5561..8e7aa0f2910 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,7 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 			    unsigned int ssize);
 
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
 int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
 
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
@@ -61,5 +60,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 
 void gfs2_log_shutdown(struct gfs2_sbd *sdp);
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939a09f6e88..fbeba813c97 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -574,20 +574,3 @@ out:
 	return first_bh;
 }
 
-/**
- * gfs2_meta_syncfs - sync all the buffers in a filesystem
- * @sdp: the filesystem
- *
- */
-
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
-{
-	gfs2_log_flush(sdp, NULL);
-	for (;;) {
-		gfs2_ail1_start(sdp, DIO_ALL);
-		if (gfs2_ail1_empty(sdp, DIO_ALL))
-			break;
-		msleep(10);
-	}
-}
-
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 3ec939e20df..e037425bc04 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -67,7 +67,6 @@ static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 }
 
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
 
 #define buffer_busy(bh) \
 ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
-- 
cgit v1.2.3


From cb4c03131836a55bf95e1c165409244ac6b4f39f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 11:16:32 -0500
Subject: [GFS2] Reduce number of arguments to meta_io.c:getbuf()

Since the superblock and the address_space are determined by the
glock, we might as well just pass that as the argument since all
the callers already have that available.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/meta_io.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index fbeba813c97..0e34d991897 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -127,17 +127,17 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
 
 /**
  * getbuf - Get a buffer with a given address space
- * @sdp: the filesystem
- * @aspace: the address space
+ * @gl: the glock
  * @blkno: the block number (filesystem scope)
  * @create: 1 if the buffer should be created
  *
  * Returns: the buffer
  */
 
-static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
-				  u64 blkno, int create)
+static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 {
+	struct address_space *mapping = gl->gl_aspace->i_mapping;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct page *page;
 	struct buffer_head *bh;
 	unsigned int shift;
@@ -150,13 +150,13 @@ static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
 
 	if (create) {
 		for (;;) {
-			page = grab_cache_page(aspace->i_mapping, index);
+			page = grab_cache_page(mapping, index);
 			if (page)
 				break;
 			yield();
 		}
 	} else {
-		page = find_lock_page(aspace->i_mapping, index);
+		page = find_lock_page(mapping, index);
 		if (!page)
 			return NULL;
 	}
@@ -202,7 +202,7 @@ static void meta_prep_new(struct buffer_head *bh)
 struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 {
 	struct buffer_head *bh;
-	bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	bh = getbuf(gl, blkno, CREATE);
 	meta_prep_new(bh);
 	return bh;
 }
@@ -220,7 +220,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		   struct buffer_head **bhp)
 {
-	*bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	*bhp = getbuf(gl, blkno, CREATE);
 	if (!buffer_uptodate(*bhp))
 		ll_rw_block(READ_META, 1, bhp);
 	if (flags & DIO_WAIT) {
@@ -379,11 +379,10 @@ void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct inode *aspace = ip->i_gl->gl_aspace;
 	struct buffer_head *bh;
 
 	while (blen) {
-		bh = getbuf(sdp, aspace, bstart, NO_CREATE);
+		bh = getbuf(ip->i_gl, bstart, NO_CREATE);
 		if (bh) {
 			struct gfs2_bufdata *bd = bh->b_private;
 
@@ -484,7 +483,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 	spin_unlock(&ip->i_spin);
 
 	if (!bh)
-		bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
+		bh = getbuf(gl, num, CREATE);
 
 	if (!bh)
 		return -ENOBUFS;
@@ -535,7 +534,6 @@ err:
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct inode *aspace = gl->gl_aspace;
 	struct buffer_head *first_bh, *bh;
 	u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
 			  sdp->sd_sb.sb_bsize_shift;
@@ -547,7 +545,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (extlen > max_ra)
 		extlen = max_ra;
 
-	first_bh = getbuf(sdp, aspace, dblock, CREATE);
+	first_bh = getbuf(gl, dblock, CREATE);
 
 	if (buffer_uptodate(first_bh))
 		goto out;
@@ -558,7 +556,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	extlen--;
 
 	while (extlen) {
-		bh = getbuf(sdp, aspace, dblock, CREATE);
+		bh = getbuf(gl, dblock, CREATE);
 
 		if (!buffer_uptodate(bh) && !buffer_locked(bh))
 			ll_rw_block(READA, 1, &bh);
-- 
cgit v1.2.3


From 300c7d75f3a5e8edd3e390ccd56b808f3fb14e33 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 27 Nov 2006 09:55:28 -0500
Subject: [GFS2] Fix recursive locking in gfs2_permission

Since gfs2_permission may be called either from the VFS (in which case
we need to obtain a shared glock) or from GFS2 (in which case we already
have a glock) we need to test to see whether or not a lock is required.
The original test was buggy due to a potential race. This one should
be safe.

This fixes Red Hat bugzilla #217129

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_inode.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index b247f25effb..fd9fee2ceea 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -835,6 +835,10 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  * @mask:
  * @nd: passed from Linux VFS, ignored by us
  *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
  * Returns: errno
  */
 
@@ -843,15 +847,18 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder i_gh;
 	int error;
+	int unlock = 0;
 
-	if (!test_bit(GIF_INVALID, &ip->i_flags))
-		return generic_permission(inode, mask, gfs2_check_acl);
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		if (error)
+			return error;
+		unlock = 1;
+	}
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (!error) {
-		error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
-	}
 
 	return error;
 }
-- 
cgit v1.2.3


From dcf3dd852f554bb0016aa23892596717cc123a26 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 27 Nov 2006 10:12:05 -0500
Subject: [GFS2] Fix recursive locking in gfs2_getattr

The readdirplus NFS operation can result in gfs2_getattr being
called with the glock already held. In this case we do not want
to try and grab the lock again.

This fixes Red Hat bugzilla #215727

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_inode.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index fd9fee2ceea..fbe38c1ab54 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -992,6 +992,12 @@ out:
  * @dentry: The dentry to stat
  * @stat: The inode's stats
  *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
  * Returns: errno
  */
 
@@ -1002,14 +1008,20 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
+	int unlock = 0;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-	if (!error) {
-		generic_fillattr(inode, stat);
-		gfs2_glock_dq_uninit(&gh);
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
-	return error;
+	generic_fillattr(inode, stat);
+	if (unlock);
+		gfs2_glock_dq_uninit(&gh);
+
+	return 0;
 }
 
 static int gfs2_setxattr(struct dentry *dentry, const char *name,
-- 
cgit v1.2.3


From 2896ee37ccc1f9acb244c9b02becb74a43661009 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 11:31:22 -0600
Subject: [DLM] fix add_requestqueue checking nodes list

Requests that arrive after recovery has started are saved in the
requestqueue and processed after recovery is done.  Some of these requests
are purged during recovery if they are from nodes that have been removed.
We move the purging of the requests (dlm_purge_requestqueue) to later in
the recovery sequence which allows the routine saving requests
(dlm_add_requestqueue) to avoid filtering out requests by nodeid since the
same will be done by the purge.  The current code has add_requestqueue
filtering by nodeid but doesn't hold any locks when accessing the list of
current nodes.  This also means that we need to call the purge routine
when the lockspace is being shut down since the add routine will not be
rejecting requests itself any more.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lockspace.c    |  2 ++
 fs/dlm/recoverd.c     | 16 ++++++++--------
 fs/dlm/requestqueue.c |  7 ++++---
 3 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f8842ca443c..791388b25c3 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -22,6 +22,7 @@
 #include "memory.h"
 #include "lock.h"
 #include "recover.h"
+#include "requestqueue.h"
 
 #ifdef CONFIG_DLM_DEBUG
 int dlm_create_debug_file(struct dlm_ls *ls);
@@ -684,6 +685,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	 * Free structures on any other lists
 	 */
 
+	dlm_purge_requestqueue(ls);
 	kfree(ls->ls_recover_args);
 	dlm_clear_free_entries(ls);
 	dlm_clear_members(ls);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 8bb895ffd90..9dc2f9156f1 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -93,14 +93,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		goto fail;
 	}
 
-	/*
-	 * Purge directory-related requests that are saved in requestqueue.
-	 * All dir requests from before recovery are invalid now due to the dir
-	 * rebuild and will be resent by the requesting nodes.
-	 */
-
-	dlm_purge_requestqueue(ls);
-
 	/*
 	 * Wait for all nodes to complete directory rebuild.
 	 */
@@ -181,6 +173,14 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_release_root_list(ls);
 
+	/*
+	 * Purge directory-related requests that are saved in requestqueue.
+	 * All dir requests from before recovery are invalid now due to the dir
+	 * rebuild and will be resent by the requesting nodes.
+	 */
+
+	dlm_purge_requestqueue(ls);
+
 	dlm_set_recover_status(ls, DLM_RS_DONE);
 	error = dlm_recover_done_wait(ls);
 	if (error) {
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 0226d2a0a0f..65008d79c96 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -36,9 +36,6 @@ int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 	int length = hd->h_length;
 	int rv = 0;
 
-	if (dlm_is_removed(ls, nodeid))
-		return 0;
-
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
@@ -133,6 +130,10 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
 {
 	uint32_t type = ms->m_type;
 
+	/* the ls is being cleaned up and freed by release_lockspace */
+	if (!ls->ls_count)
+		return 1;
+
 	if (dlm_is_removed(ls, nodeid))
 		return 1;
 
-- 
cgit v1.2.3


From aed3255f2267e2d1d95b9cf7f2995ce24e6c873b Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <ryusuke@osrg.net>
Date: Tue, 28 Nov 2006 02:53:22 +0900
Subject: [GFS2] fs/gfs2/log.c:log_bmap() fix printk format warning

Fix a printk format warning in fs/gfs2/log.c:
fs/gfs2/log.c:322: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'sector_t'

Signed-off-by: Ryusuke Konishi <ryusuke@osrg.net>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 7713d591867..291415ddfe5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -326,7 +326,8 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 	bh_map.b_size = 1 << inode->i_blkbits;
 	error = gfs2_block_map(inode, lbn, 0, &bh_map);
 	if (error || !bh_map.b_blocknr)
-		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
+		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
+		       (unsigned long long)bh_map.b_blocknr, lbn);
 	gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
 
 	return bh_map.b_blocknr;
-- 
cgit v1.2.3


From 1babdb453138f17b8ed3d1d5711089c4e2fa5ace Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 13:18:41 -0600
Subject: [DLM] fix size of STATUS_REPLY message

When the not_ready routine sends a "fake" status reply with blank status
flags, it needs to use the correct size for a normal STATUS_REPLY by
including the size of the would-be config parameters.  We also fill in the
non-existant config parameters with an invalid lvblen value so it's easier
to notice if these invalid paratmers are ever being used.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 87b12f7d3d2..6ac195cec02 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -370,9 +370,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
+	struct rcom_config *rf;
 	struct dlm_mhandle *mh;
 	char *mb;
-	int mb_len = sizeof(struct dlm_rcom);
+	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
 	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
 	if (!mh)
@@ -391,6 +392,9 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	rc->rc_id = rc_in->rc_id;
 	rc->rc_result = -ESRCH;
 
+	rf = (struct rcom_config *) rc->rc_buf;
+	rf->rf_lvblen = -1;
+
 	dlm_rcom_out(rc);
 	dlm_lowcomms_commit_buffer(mh);
 
-- 
cgit v1.2.3


From 98f176fb32f33795b6d0f83856008b932123ab38 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 13:19:28 -0600
Subject: [DLM] don't accept replies to old recovery messages

We often abort a recovery after sending a status request to a remote node.
We want to ignore any potential status reply we get from the remote node.
If we get one of these unwanted replies, we've often moved on to the next
recovery message and incremented the message sequence counter, so the
reply will be ignored due to the seq number.  In some cases, we've not
moved on to the next message so the seq number of the reply we want to
ignore is still correct, causing the reply to be accepted.  The next
recovery message will then mistake this old reply as a new one.

To fix this, we add the flag RCOM_WAIT to indicate when we can accept a
new reply.  We clear this flag if we abort recovery while waiting for a
reply.  Before the flag is set again (to allow new replies) we know that
any old replies will be rejected due to their sequence number.  We also
initialize the recovery-message sequence number to a random value when a
lockspace is first created.  This makes it clear when messages are being
rejected from an old instance of a lockspace that has since been
recreated.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/dlm_internal.h |  4 +++-
 fs/dlm/lockspace.c    |  2 ++
 fs/dlm/rcom.c         | 44 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1e5cd67e1b7..1ee8195e6fc 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -471,6 +471,7 @@ struct dlm_ls {
 	char			*ls_recover_buf;
 	int			ls_recover_nodeid; /* for debugging */
 	uint64_t		ls_rcom_seq;
+	spinlock_t		ls_rcom_spin;
 	struct list_head	ls_recover_list;
 	spinlock_t		ls_recover_list_lock;
 	int			ls_recover_list_count;
@@ -488,7 +489,8 @@ struct dlm_ls {
 #define LSFL_RUNNING		1
 #define LSFL_RECOVERY_STOP	2
 #define LSFL_RCOM_READY		3
-#define LSFL_UEVENT_WAIT	4
+#define LSFL_RCOM_WAIT		4
+#define LSFL_UEVENT_WAIT	5
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 791388b25c3..59012b089e8 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -479,6 +479,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
 	spin_lock_init(&ls->ls_recover_lock);
+	spin_lock_init(&ls->ls_rcom_spin);
+	get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
 	ls->ls_recover_status = 0;
 	ls->ls_recover_seq = 0;
 	ls->ls_recover_args = NULL;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6ac195cec02..c42f2db6c4b 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -90,13 +90,28 @@ static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
 	return 0;
 }
 
+static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	*new_seq = ++ls->ls_rcom_seq;
+	set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
+static void disallow_sync_reply(struct dlm_ls *ls)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
 int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
 {
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
 	int error = 0;
 
-	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 	ls->ls_recover_nodeid = nodeid;
 
 	if (nodeid == dlm_our_nodeid()) {
@@ -108,12 +123,14 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
 	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
 	if (error)
 		goto out;
-	rc->rc_id = ++ls->ls_rcom_seq;
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 
 	send_rcom(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
-	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	disallow_sync_reply(ls);
 	if (error)
 		goto out;
 
@@ -150,14 +167,20 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
-	if (rc_in->rc_id != ls->ls_rcom_seq) {
-		log_debug(ls, "reject old reply %d got %llx wanted %llx",
-			  rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq);
-		return;
+	spin_lock(&ls->ls_rcom_spin);
+	if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
+	    rc_in->rc_id != ls->ls_rcom_seq) {
+		log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
+			  rc_in->rc_type, rc_in->rc_header.h_nodeid,
+			  rc_in->rc_id, ls->ls_rcom_seq);
+		goto out;
 	}
 	memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
 	set_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
 	wake_up(&ls->ls_wait_general);
+ out:
+	spin_unlock(&ls->ls_rcom_spin);
 }
 
 static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -171,7 +194,6 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	struct dlm_mhandle *mh;
 	int error = 0, len = sizeof(struct dlm_rcom);
 
-	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 	ls->ls_recover_nodeid = nodeid;
 
 	if (nodeid == dlm_our_nodeid()) {
@@ -185,12 +207,14 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, last_name, last_len);
-	rc->rc_id = ++ls->ls_rcom_seq;
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 
 	send_rcom(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
-	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	disallow_sync_reply(ls);
  out:
 	return error;
 }
-- 
cgit v1.2.3


From 0ac230699a0f3f0d15ad4e4ad99446dac5b4a21f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 28 Nov 2006 22:29:19 -0800
Subject: [GFS2] lock function parameter

Fix function parameter typing:
fs/gfs2/glock.c:100: warning: function declaration isn't a prototype

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3c2ff81c84e..f130f9894bd 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -96,7 +96,7 @@ static inline rwlock_t *gl_lock_addr(unsigned int x)
 	return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
 }
 #else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(x)
+static inline rwlock_t *gl_lock_addr(unsigned int x)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 57adf7eede38d315e0e328c52484d6a596e9a238 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <ryusuke@osrg.net>
Date: Wed, 29 Nov 2006 09:33:48 -0500
Subject: [DLM] fix format warnings in rcom.c and recoverd.c

This fixes the following gcc warnings generated on
the architectures where uint64_t != unsigned long long (e.g. ppc64).

fs/dlm/rcom.c:154: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'uint64_t'
fs/dlm/rcom.c:154: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'uint64_t'
fs/dlm/recoverd.c:48: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'
fs/dlm/recoverd.c:202: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'
fs/dlm/recoverd.c:210: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'

Signed-off-by: Ryusuke Konishi <ryusuke@osrg.net>
Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c     | 3 ++-
 fs/dlm/recoverd.c | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index c42f2db6c4b..4cc31be9cd9 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -172,7 +172,8 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	    rc_in->rc_id != ls->ls_rcom_seq) {
 		log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
 			  rc_in->rc_type, rc_in->rc_header.h_nodeid,
-			  rc_in->rc_id, ls->ls_rcom_seq);
+			  (unsigned long long)rc_in->rc_id,
+			  (unsigned long long)ls->ls_rcom_seq);
 		goto out;
 	}
 	memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 9dc2f9156f1..650536aa513 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -45,7 +45,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	unsigned long start;
 	int error, neg = 0;
 
-	log_debug(ls, "recover %llx", rv->seq);
+	log_debug(ls, "recover %llx", (unsigned long long)rv->seq);
 
 	mutex_lock(&ls->ls_recoverd_active);
 
@@ -212,7 +212,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_astd_wake();
 
-	log_debug(ls, "recover %llx done: %u ms", rv->seq,
+	log_debug(ls, "recover %llx done: %u ms",
+		  (unsigned long long)rv->seq,
 		  jiffies_to_msecs(jiffies - start));
 	mutex_unlock(&ls->ls_recoverd_active);
 
@@ -220,7 +221,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
  fail:
 	dlm_release_root_list(ls);
-	log_debug(ls, "recover %llx error %d", rv->seq, error);
+	log_debug(ls, "recover %llx error %d",
+		  (unsigned long long)rv->seq, error);
 	mutex_unlock(&ls->ls_recoverd_active);
 	return error;
 }
-- 
cgit v1.2.3


From 77386e1f662f104680da7885d32e068e4b11b882 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 29 Nov 2006 10:41:49 -0500
Subject: [GFS2] Remove gfs2_check_acl()

As pointed out by Adrian Bunk, the gfs2_check_acl() function is no
longer used. This patch removes it and renamed gfs2_check_acl_locked()
to gfs2_check_acl() since we only need one variant of that function now.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Adrian Bunk <bunk@stusta.de>
---
 fs/gfs2/acl.c       | 19 ++-----------------
 fs/gfs2/acl.h       |  1 -
 fs/gfs2/ops_inode.c |  2 +-
 3 files changed, 3 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3908992b27a..6e80844367e 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -145,14 +145,14 @@ out:
 }
 
 /**
- * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
+ * gfs2_check_acl - Check an ACL to see if we're allowed to do something
  * @inode: the file we want to do something to
  * @mask: what we want to do
  *
  * Returns: errno
  */
 
-int gfs2_check_acl_locked(struct inode *inode, int mask)
+int gfs2_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = NULL;
 	int error;
@@ -170,21 +170,6 @@ int gfs2_check_acl_locked(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int gfs2_check_acl(struct inode *inode, int mask)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder i_gh;
-	int error;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (!error) {
-		error = gfs2_check_acl_locked(inode, mask);
-		gfs2_glock_dq_uninit(&i_gh);
-	}
-
-	return error;
-}
-
 static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 05c294fe0d7..6751930bfb6 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -31,7 +31,6 @@ int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
 			  struct gfs2_ea_request *er,
 			  int *remove, mode_t *mode);
 int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
-int gfs2_check_acl_locked(struct inode *inode, int mask);
 int gfs2_check_acl(struct inode *inode, int mask);
 int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
 int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index fbe38c1ab54..636dda4c7d3 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -856,7 +856,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 		unlock = 1;
 	}
 
-	error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	error = generic_permission(inode, mask, gfs2_check_acl);
 	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
 
-- 
cgit v1.2.3


From 0da3585e1ef650d3224b4d6f9799558d1d99fa1e Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Thu, 30 Nov 2006 15:04:55 +0530
Subject: [GFS2] Mount problem with the GFS2 code

  While mounting the gfs2 filesystem,our test team had a problem and we
got this error message.
=======================================================

GFS2: fsid=: Trying to join cluster "lock_nolock", "dasde1"
GFS2: fsid=dasde1.0: Joined cluster. Now mounting FS...
GFS2: not a GFS2 filesystem
GFS2: fsid=dasde1.0: can't read superblock: -22

==========================================================================
On debugging further we found that problem is while reading the super
block(gfs2_read_super) and comparing the magic number in it.
When I  replace the submit_bio() call(present in gfs2_read_super) with
the sb_getblk() and ll_rw_block(), mount operation succeded.
On further analysis we found that before calling submit_bio(),
bio->bi_sector was set to "sector" variable. This "sector" variable has
the same value of bh->b_blocknr(block number). Hence there is a need to
multiply this valuwith (blocksize >> 9)(9 because,sector size
2^9,samething happens in ll_rw_block also, before calling submit_bio()).
So I have developed the patch which solves this problem. Please let me
know your comments.
================================================================

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1408c5f3137..3b227274279 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -199,7 +199,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 		return NULL;
 	}
 
-	bio->bi_sector = sector;
+	bio->bi_sector = sector * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio_add_page(bio, page, PAGE_SIZE, 0);
 
-- 
cgit v1.2.3


From aac1a3c77a46c2d06f297641760dd740ac2a84af Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 30 Nov 2006 10:02:19 -0500
Subject: [GFS2] Add a comment about reading the super block

The comment explains why we use the bio functions to read
the super block.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Srinivasa Ds <srinivasa@in.ibm.com>
---
 fs/gfs2/super.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3b227274279..43a24f2e590 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -180,6 +180,24 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 	return 0;
 }
 
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sb: The VFS super block
+ * @sector: The location of the super block
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: A page containing the sb or NULL
+ */
+
 struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 {
 	struct page *page;
-- 
cgit v1.2.3


From 33c3de32872ef3c075e4dac04c0de8f86ac39f6f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 30 Nov 2006 10:14:32 -0500
Subject: [GFS2] Don't flush everything on fdatasync

The gfs2_fsync() function was doing a journal flush on each
and every call. While this is correct, its also a lot of
overhead. This patch means that on fdatasync flushes we
rely on the VFS to flush the data for us and we don't do
a journal flush unless we really need to.

We have to do a journal flush for stuffed files though because
they have the data and the inode metadata in the same block.
Journaled files also need a journal flush too of course.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_file.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index c2be216eb92..7bd971bf7cd 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -22,6 +22,7 @@
 #include <linux/ext2_fs.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/writeback.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -503,16 +504,39 @@ static int gfs2_close(struct inode *inode, struct file *file)
  * @file: the file that points to the dentry (we ignore this)
  * @dentry: the dentry that points to the inode to sync
  *
+ * The VFS will flush "normal" data for us. We only need to worry
+ * about metadata here. For journaled data, we just do a log flush
+ * as we can't avoid it. Otherwise we can just bale out if datasync
+ * is set. For stuffed inodes we must flush the log in order to
+ * ensure that all data is on disk.
+ *
  * Returns: errno
  */
 
 static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct inode *inode = dentry->d_inode;
+	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+	int ret = 0;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0,
+	};
+
+	if (gfs2_is_jdata(GFS2_I(inode))) {
+		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+		return 0;
+	}
 
-	gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
+	if (sync_state != 0) {
+		if (!datasync)
+			ret = sync_inode(inode, &wbc);
 
-	return 0;
+		if (gfs2_is_stuffed(GFS2_I(inode)))
+			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+	}
+
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From ad2d7225709b11da47e092634cbdf0591829ae9c Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 1 Dec 2006 10:40:20 +0100
Subject: [PATCH] block: kill length alignment test in bio_map_user()

The target mode support is mapping in bios using bio_map_user. The
current targets do not need their len to be aligned with a queue limit
so this check is causing some problems. Note: pointers passed into the
kernel are properly aligned by usersapace tgt code so the uaddr check
in bio_map_user is ok.

The major user, blk_bio_map_user checks for the len before mapping
so it is not affected by this patch.

And the semi-newly added user blk_rq_map_user_iov has been failing
out when the len is not aligned properly so maybe people have been
good and not sending misaligned lens or that path is not used very
often and this change will not be very dangerous. st and sg do not
check the length and we have not seen any problem reports from those
wider used paths so this patch should be fairly safe - for mm
and wider testing at least.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index f95c8749499..d91cfbf7ebc 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -622,10 +622,9 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 
 		nr_pages += end - start;
 		/*
-		 * transfer and buffer must be aligned to at least hardsector
-		 * size for now, in the future we can relax this restriction
+		 * buffer must be aligned to at least hardsector size for now
 		 */
-		if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
+		if (uaddr & queue_dma_alignment(q))
 			return ERR_PTR(-EINVAL);
 	}
 
-- 
cgit v1.2.3


From 0e75f9063f5c55fb0b0b546a7c356f8ec186825e Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 1 Dec 2006 10:40:55 +0100
Subject: [PATCH] block: support larger block pc requests

This patch modifies blk_rq_map/unmap_user() and the cdrom and scsi_ioctl.c
users so that it supports requests larger than bio by chaining them together.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index d91cfbf7ebc..aa4d09bd4e7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -560,10 +560,8 @@ struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr,
 			break;
 		}
 
-		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
-			ret = -EINVAL;
+		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
 			break;
-		}
 
 		len -= bytes;
 	}
@@ -750,7 +748,6 @@ struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
 			     int write_to_vm)
 {
 	struct bio *bio;
-	int len = 0, i;
 
 	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
 
@@ -765,18 +762,7 @@ struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
 	 */
 	bio_get(bio);
 
-	for (i = 0; i < iov_count; i++)
-		len += iov[i].iov_len;
-
-	if (bio->bi_size == len)
-		return bio;
-
-	/*
-	 * don't support partial mappings
-	 */
-	bio_endio(bio, bio->bi_size, 0);
-	bio_unmap_user(bio);
-	return ERR_PTR(-EINVAL);
+	return bio;
 }
 
 static void __bio_unmap_user(struct bio *bio)
-- 
cgit v1.2.3


From 035ed7a49447bc8e15d4d9316fc6a359b2d94333 Mon Sep 17 00:00:00 2001
From: Thomas Maier <balagi@justmail.de>
Date: Sun, 22 Oct 2006 19:17:47 +0200
Subject: sysfs: sysfs_write_file() writes zero terminated data

since most of the files in sysfs are text files,
it would be nice, if the "store" function called
during sysfs_write_file() gets a zero terminated
string / data.
The current implementation seems not to ensure this.
(But only if it is the first time the zeroed buffer
page is allocated.)

So the buffer can be scanned by sscanf() easily,
for example.

This patch simply sets a \0 char behind the
data in buffer->page.

Signed-off-by: Thomas Maier <balagi@justmail.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 298303b5a71..95c165101c9 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -190,6 +190,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 		count = PAGE_SIZE - 1;
 	error = copy_from_user(buffer->page,buf,count);
 	buffer->needs_read_fill = 1;
+	/* if buf is assumed to contain a string, terminate it by \0,
+	   so e.g. sscanf() can scan the string easily */
+	buffer->page[count] = 0;
 	return error ? -EFAULT : count;
 }
 
-- 
cgit v1.2.3


From 8a82472f86bf693b8e91ed56c9ca4f62fbbdcfa3 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Mon, 20 Nov 2006 17:07:51 +0100
Subject: driver core: Introduce device_move(): move a device to a new parent.

Provide a function device_move() to move a device to a new parent device. Add
auxilliary functions kobject_move() and sysfs_move_dir().
kobject_move() generates a new uevent of type KOBJ_MOVE, containing the
previous path (DEVPATH_OLD) in addition to the usual values. For this, a new
interface kobject_uevent_env() is created that allows to add further
environmental data to the uevent at the kobject layer.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3aa3434621c..a5782e8c7f0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -372,6 +372,51 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 	return error;
 }
 
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
+{
+	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
+	struct sysfs_dirent *new_parent_sd, *sd;
+	int error;
+
+	if (!new_parent)
+		return -EINVAL;
+
+	old_parent_dentry = kobj->parent ?
+		kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
+	new_parent_dentry = new_parent->dentry;
+
+again:
+	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
+	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
+		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+		goto again;
+	}
+
+	new_parent_sd = new_parent_dentry->d_fsdata;
+	sd = kobj->dentry->d_fsdata;
+
+	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
+				    strlen(kobj->name));
+	if (IS_ERR(new_dentry)) {
+		error = PTR_ERR(new_dentry);
+		goto out;
+	} else
+		error = 0;
+	d_add(new_dentry, NULL);
+	d_move(kobj->dentry, new_dentry);
+	dput(new_dentry);
+
+	/* Remove from old parent's list and insert into new parent's list. */
+	list_del_init(&sd->s_sibling);
+	list_add(&sd->s_sibling, &new_parent_sd->s_children);
+
+out:
+	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
+	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+
+	return error;
+}
+
 static int sysfs_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry * dentry = file->f_dentry;
-- 
cgit v1.2.3


From da66116eef7da8557762c9b5efdc435bc0afecfa Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 20 Nov 2006 03:24:28 +0100
Subject: [2.6 patch] make ocfs2_create_new_lock() static

This patch makes the needlessly global ocfs2_create_new_lock() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 8 ++++----
 fs/ocfs2/dlmglue.h | 2 --
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8801e41afe8..ff8d3dbdeb3 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1063,10 +1063,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 	mlog_exit_void();
 }
 
-int ocfs2_create_new_lock(struct ocfs2_super *osb,
-			  struct ocfs2_lock_res *lockres,
-			  int ex,
-			  int local)
+static int ocfs2_create_new_lock(struct ocfs2_super *osb,
+				 struct ocfs2_lock_res *lockres,
+				 int ex,
+				 int local)
 {
 	int level =  ex ? LKM_EXMODE : LKM_PRMODE;
 	unsigned long flags;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 4a276938722..75c49ce2be3 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -68,8 +68,6 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 				u64 parent, struct inode *inode);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
-int ocfs2_create_new_lock(struct ocfs2_super *osb,
-			  struct ocfs2_lock_res *lockres, int ex, int local);
 int ocfs2_drop_inode_locks(struct inode *inode);
 int ocfs2_data_lock_full(struct inode *inode,
 			 int write,
-- 
cgit v1.2.3


From f5a923d1ba648bfb3cc922c66981fc8e3280f57f Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 28 Nov 2006 15:13:18 -0800
Subject: ocfs2: fix format warnings in dlm_alloc_pagevec()

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmdomain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8d1065f8b3b..f6cdab3a2c6 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -68,7 +68,8 @@ static void **dlm_alloc_pagevec(int pages)
 			goto out_free;
 
 	mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
-	     pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE);
+	     pages, (unsigned long)DLM_HASH_PAGES,
+	     (unsigned long)DLM_BUCKETS_PER_PAGE);
 	return vec;
 out_free:
 	dlm_free_pagevec(vec, i);
-- 
cgit v1.2.3


From 01ddf1e186b3b12b38c9e44912e0fd6a1cbc882b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 13:54:39 -0700
Subject: ocfs2: remove unused ocfs2_journal_handle field

max_buffs was just being set and not actually used.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.c | 7 +------
 fs/ocfs2/journal.h | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fd9734def55..e26cd0ca517 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -124,7 +124,6 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 		return NULL;
 	}
 
-	retval->max_buffs = 0;
 	retval->num_locks = 0;
 	retval->k_handle = NULL;
 
@@ -172,8 +171,6 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 		goto done_free;
 	}
 
-	handle->max_buffs = max_buffs;
-
 	down_read(&osb->journal->j_trans_barrier);
 
 	/* actually start the transaction now */
@@ -353,9 +350,7 @@ int ocfs2_extend_trans(struct ocfs2_journal_handle *handle,
 			mlog_errno(status);
 			goto bail;
 		}
-		handle->max_buffs = nblocks;
-	} else
-		handle->max_buffs += nblocks;
+	}
 
 	status = 0;
 bail:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2f3a6acdac4..35ae835e969 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,6 @@ struct ocfs2_journal_handle {
 	handle_t            *k_handle; /* kernel handle.                */
 	struct ocfs2_journal        *journal;
 	u32                 flags;     /* see flags below.              */
-	int                 max_buffs; /* Buffs reserved by this handle */
 
 	/* The following two fields are for ocfs2_handle_add_lock */
 	int                 num_locks;
-- 
cgit v1.2.3


From 1fc581467e52546195c7ee8233a34d63c1cc1322 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 14:15:36 -0700
Subject: ocfs2: have ocfs2_extend_trans() take handle_t

No reason to use our wrapper struct in this function, so take the handle_t
directly.

Also fixes a bug where we were incorrectly setting the handle to NULL in
case of a failure from journal_restart()

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c   | 2 +-
 fs/ocfs2/file.c    | 2 +-
 fs/ocfs2/journal.c | 9 +++------
 fs/ocfs2/journal.h | 3 +--
 4 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f43bc5f18a3..74b93f92429 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1074,7 +1074,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 		/* TODO: Perhaps we can calculate the bulk of the
 		 * credits up front rather than extending like
 		 * this. */
-		status = ocfs2_extend_trans(handle,
+		status = ocfs2_extend_trans(handle->k_handle,
 					    OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
 		if (status < 0) {
 			mlog_errno(status);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1be74c4e781..d8bd2c32f08 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -566,7 +566,7 @@ restarted_transaction:
 			credits = ocfs2_calc_extend_credits(osb->sb,
 							    fe,
 							    clusters_to_add);
-			status = ocfs2_extend_trans(handle, credits);
+			status = ocfs2_extend_trans(handle->k_handle, credits);
 			if (status < 0) {
 				/* handle still has to be committed at
 				 * this point. */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e26cd0ca517..7c0c57ad517 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -323,20 +323,18 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
  * good because transaction ids haven't yet been recorded on the
  * cluster locks associated with this handle.
  */
-int ocfs2_extend_trans(struct ocfs2_journal_handle *handle,
-		       int nblocks)
+int ocfs2_extend_trans(handle_t *handle, int nblocks)
 {
 	int status;
 
 	BUG_ON(!handle);
-	BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED));
 	BUG_ON(!nblocks);
 
 	mlog_entry_void();
 
 	mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
 
-	status = journal_extend(handle->k_handle, nblocks);
+	status = journal_extend(handle, nblocks);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -344,9 +342,8 @@ int ocfs2_extend_trans(struct ocfs2_journal_handle *handle,
 
 	if (status > 0) {
 		mlog(0, "journal_extend failed, trying journal_restart\n");
-		status = journal_restart(handle->k_handle, nblocks);
+		status = journal_restart(handle, nblocks);
 		if (status < 0) {
-			handle->k_handle = NULL;
 			mlog_errno(status);
 			goto bail;
 		}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 35ae835e969..9f3d79dac33 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -264,8 +264,7 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 					       struct ocfs2_journal_handle *handle,
 					       int max_buffs);
 void			     ocfs2_commit_trans(struct ocfs2_journal_handle *handle);
-int			     ocfs2_extend_trans(struct ocfs2_journal_handle *handle,
-						int nblocks);
+int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 
 /*
  * Create access is for when we get a newly created buffer and we're
-- 
cgit v1.2.3


From c161f89be7d57af863e434e9b15afaa863343a7a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 15:11:36 -0700
Subject: ocfs2: remove ocfs2_journal_handle flags field

Callers can set h_sync directly on the handle_t, whether a transaction has
been started or not can be determined via the existence of the handle_t on
the struct ocfs2_journal_handle.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.c    | 13 +------------
 fs/ocfs2/journal.h    | 17 -----------------
 fs/ocfs2/localalloc.c |  4 ++--
 fs/ocfs2/suballoc.c   |  2 +-
 4 files changed, 4 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7c0c57ad517..ca6f2094b00 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -190,7 +190,6 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 	}
 
 	atomic_inc(&(osb->journal->j_num_trans));
-	handle->flags |= OCFS2_HANDLE_STARTED;
 
 	mlog_exit_ptr(handle);
 	return handle;
@@ -247,8 +246,6 @@ static void ocfs2_commit_unstarted_handle(struct ocfs2_journal_handle *handle)
 {
 	mlog_entry_void();
 
-	BUG_ON(handle->flags & OCFS2_HANDLE_STARTED);
-
 	ocfs2_handle_unlock_inodes(handle);
 	/* You are allowed to add journal locks before the transaction
 	 * has started. */
@@ -269,7 +266,7 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 
 	BUG_ON(!handle);
 
-	if (!(handle->flags & OCFS2_HANDLE_STARTED)) {
+	if (!handle->k_handle) {
 		ocfs2_commit_unstarted_handle(handle);
 		mlog_exit_void();
 		return;
@@ -285,11 +282,6 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 	if (handle->k_handle) {
 		jbd_handle = handle->k_handle;
 
-		if (handle->flags & OCFS2_HANDLE_SYNC)
-			jbd_handle->h_sync = 1;
-		else
-			jbd_handle->h_sync = 0;
-
 		/* actually stop the transaction. if we've set h_sync,
 		 * it'll have been committed when we return */
 		retval = journal_stop(jbd_handle);
@@ -366,7 +358,6 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
 	BUG_ON(!inode);
 	BUG_ON(!handle);
 	BUG_ON(!bh);
-	BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED));
 
 	mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n",
 		   (unsigned long long)bh->b_blocknr, type,
@@ -421,8 +412,6 @@ int ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
 {
 	int status;
 
-	BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED));
-
 	mlog_entry("(bh->b_blocknr=%llu)\n",
 		   (unsigned long long)bh->b_blocknr);
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 9f3d79dac33..6b5d548ca11 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -143,7 +143,6 @@ struct ocfs2_journal_lock {
 struct ocfs2_journal_handle {
 	handle_t            *k_handle; /* kernel handle.                */
 	struct ocfs2_journal        *journal;
-	u32                 flags;     /* see flags below.              */
 
 	/* The following two fields are for ocfs2_handle_add_lock */
 	int                 num_locks;
@@ -154,22 +153,6 @@ struct ocfs2_journal_handle {
 	struct list_head     inode_list;
 };
 
-#define OCFS2_HANDLE_STARTED			1
-/* should we sync-commit this handle? */
-#define OCFS2_HANDLE_SYNC			2
-static inline int ocfs2_handle_started(struct ocfs2_journal_handle *handle)
-{
-	return handle->flags & OCFS2_HANDLE_STARTED;
-}
-
-static inline void ocfs2_handle_set_sync(struct ocfs2_journal_handle *handle, int sync)
-{
-	if (sync)
-		handle->flags |= OCFS2_HANDLE_SYNC;
-	else
-		handle->flags &= ~OCFS2_HANDLE_SYNC;
-}
-
 /* Exported only for the journal struct init code in super.c. Do not call. */
 void ocfs2_complete_recovery(void *data);
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 1f17a4d0828..f2f384dd5ba 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -423,7 +423,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	}
 
 	/* we want the bitmap change to be recorded on disk asap */
-	ocfs2_handle_set_sync(handle, 1);
+	handle->k_handle->h_sync = 1;
 
 	status = ocfs2_sync_local_to_main(osb, handle, alloc,
 					  main_bm_inode, main_bm_bh);
@@ -465,7 +465,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	BUG_ON(!passed_handle);
 	BUG_ON(!ac);
-	BUG_ON(passed_handle->flags & OCFS2_HANDLE_STARTED);
+	BUG_ON(passed_handle->k_handle);
 
 	local_alloc_inode =
 		ocfs2_get_system_file_inode(osb,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 9d91e66f51a..32093409e25 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -414,7 +414,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	BUG_ON(handle->flags & OCFS2_HANDLE_STARTED);
+	BUG_ON(handle->k_handle);
 
 	ocfs2_handle_add_inode(handle, alloc_inode);
 	status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1);
-- 
cgit v1.2.3


From 8898a5a58fb2a2f78a15b046588b5b3adccb82c4 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 15:42:08 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock() in localalloc.c

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/localalloc.c | 84 ++++++++++++++++++++++++---------------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index f2f384dd5ba..75f09f1b4ce 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -196,7 +196,7 @@ bail:
 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 {
 	int status;
-	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_journal_handle *handle;
 	struct inode *local_alloc_inode = NULL;
 	struct buffer_head *bh = NULL;
 	struct buffer_head *main_bm_bh = NULL;
@@ -207,7 +207,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	mlog_entry_void();
 
 	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
-		goto bail;
+		goto out;
 
 	local_alloc_inode =
 		ocfs2_get_system_file_inode(osb,
@@ -216,40 +216,34 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	if (!local_alloc_inode) {
 		status = -ENOENT;
 		mlog_errno(status);
-		goto bail;
+		goto out;
 	}
 
 	osb->local_alloc_state = OCFS2_LA_DISABLED;
 
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	main_bm_inode = ocfs2_get_system_file_inode(osb,
 						    GLOBAL_BITMAP_SYSTEM_INODE,
 						    OCFS2_INVALID_SLOT);
 	if (!main_bm_inode) {
 		status = -EINVAL;
 		mlog_errno(status);
-		goto bail;
+		goto out;
 	}
 
-	ocfs2_handle_add_inode(handle, main_bm_inode);
-	status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
+	mutex_lock(&main_bm_inode->i_mutex);
+
+	status = ocfs2_meta_lock(main_bm_inode, NULL, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_mutex;
 	}
 
 	/* WINDOW_MOVE_CREDITS is a bit heavy... */
-	handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		mlog_errno(PTR_ERR(handle));
 		handle = NULL;
-		goto bail;
+		goto out_unlock;
 	}
 
 	bh = osb->local_alloc_bh;
@@ -258,7 +252,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	alloc_copy = kmalloc(bh->b_size, GFP_KERNEL);
 	if (!alloc_copy) {
 		status = -ENOMEM;
-		goto bail;
+		goto out_commit;
 	}
 	memcpy(alloc_copy, alloc, bh->b_size);
 
@@ -266,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_commit;
 	}
 
 	ocfs2_clear_local_alloc(alloc);
@@ -274,7 +268,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	status = ocfs2_journal_dirty(handle, bh);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_commit;
 	}
 
 	brelse(bh);
@@ -286,16 +280,20 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	if (status < 0)
 		mlog_errno(status);
 
-bail:
-	if (handle)
-		ocfs2_commit_trans(handle);
+out_commit:
+	ocfs2_commit_trans(handle);
 
+out_unlock:
 	if (main_bm_bh)
 		brelse(main_bm_bh);
 
-	if (main_bm_inode)
-		iput(main_bm_inode);
+	ocfs2_meta_unlock(main_bm_inode, 1);
 
+out_mutex:
+	mutex_unlock(&main_bm_inode->i_mutex);
+	iput(main_bm_inode);
+
+out:
 	if (local_alloc_inode)
 		iput(local_alloc_inode);
 
@@ -385,41 +383,35 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 					struct ocfs2_dinode *alloc)
 {
 	int status;
-	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_journal_handle *handle;
 	struct buffer_head *main_bm_bh = NULL;
-	struct inode *main_bm_inode = NULL;
+	struct inode *main_bm_inode;
 
 	mlog_entry_void();
 
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	main_bm_inode = ocfs2_get_system_file_inode(osb,
 						    GLOBAL_BITMAP_SYSTEM_INODE,
 						    OCFS2_INVALID_SLOT);
 	if (!main_bm_inode) {
 		status = -EINVAL;
 		mlog_errno(status);
-		goto bail;
+		goto out;
 	}
 
-	ocfs2_handle_add_inode(handle, main_bm_inode);
-	status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
+	mutex_lock(&main_bm_inode->i_mutex);
+
+	status = ocfs2_meta_lock(main_bm_inode, NULL, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
 		mlog_errno(status);
-		goto bail;
+		goto out_unlock;
 	}
 
 	/* we want the bitmap change to be recorded on disk asap */
@@ -430,16 +422,20 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	if (status < 0)
 		mlog_errno(status);
 
-bail:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
+
+out_unlock:
+	ocfs2_meta_unlock(main_bm_inode, 1);
+
+out_mutex:
+	mutex_unlock(&main_bm_inode->i_mutex);
 
 	if (main_bm_bh)
 		brelse(main_bm_bh);
 
-	if (main_bm_inode)
-		iput(main_bm_inode);
+	iput(main_bm_inode);
 
+out:
 	mlog_exit(status);
 	return status;
 }
-- 
cgit v1.2.3


From e08dc8b9808f06d412904db4d67434bf19984752 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 15:58:48 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock() in
 __ocfs2_flush_truncate_log()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c | 46 +++++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 74b93f92429..c8a4727bae8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1113,7 +1113,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 {
 	int status;
 	unsigned int num_to_flush;
-	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_journal_handle *handle;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct inode *data_alloc_inode = NULL;
 	struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -1130,7 +1130,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	if (!OCFS2_IS_VALID_DINODE(di)) {
 		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
 		status = -EIO;
-		goto bail;
+		goto out;
 	}
 
 	num_to_flush = le16_to_cpu(tl->tl_used);
@@ -1138,14 +1138,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
 	if (!num_to_flush) {
 		status = 0;
-		goto bail;
-	}
-
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
+		goto out;
 	}
 
 	data_alloc_inode = ocfs2_get_system_file_inode(osb,
@@ -1154,41 +1147,40 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	if (!data_alloc_inode) {
 		status = -EINVAL;
 		mlog(ML_ERROR, "Could not get bitmap inode!\n");
-		goto bail;
+		goto out;
 	}
 
-	ocfs2_handle_add_inode(handle, data_alloc_inode);
-	status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
+	mutex_lock(&data_alloc_inode->i_mutex);
+
+	status = ocfs2_meta_lock(data_alloc_inode, NULL, &data_alloc_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
-		goto bail;
+		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_TRUNCATE_LOG_UPDATE);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
-		handle = NULL;
 		mlog_errno(status);
-		goto bail;
+		goto out_unlock;
 	}
 
 	status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
 					       data_alloc_bh);
-	if (status < 0) {
+	if (status < 0)
 		mlog_errno(status);
-		goto bail;
-	}
 
-bail:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
 
-	if (data_alloc_inode)
-		iput(data_alloc_inode);
+out_unlock:
+	brelse(data_alloc_bh);
+	ocfs2_meta_unlock(data_alloc_inode, 1);
 
-	if (data_alloc_bh)
-		brelse(data_alloc_bh);
+out_mutex:
+	mutex_unlock(&data_alloc_inode->i_mutex);
+	iput(data_alloc_inode);
 
+out:
 	mlog_exit(status);
 	return status;
 }
-- 
cgit v1.2.3


From e3a821385984e57d5dd1b0e7292761abb0ea4dd9 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 16:04:17 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock() in ocfs2_mknod()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a57b751d4f4..702f0c30cda 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -333,6 +333,13 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
+	status = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
+			mlog_errno(status);
+		return status;
+	}
+
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		status = -ENOMEM;
@@ -340,13 +347,6 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
-	if (status < 0) {
-		if (status != -ENOENT)
-			mlog_errno(status);
-		goto leave;
-	}
-
 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
 		status = -EMLINK;
 		goto leave;
@@ -455,6 +455,8 @@ leave:
 	if (handle)
 		ocfs2_commit_trans(handle);
 
+	ocfs2_meta_unlock(dir, 1);
+
 	if (status == -ENOSPC)
 		mlog(0, "Disk is full\n");
 
-- 
cgit v1.2.3


From 123a9643408882620347cc2c82f24e57dab05f44 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 16:48:23 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock() in ocfs2_link()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 54 ++++++++++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 702f0c30cda..79f513001a6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -655,7 +655,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		      struct inode *dir,
 		      struct dentry *dentry)
 {
-	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_journal_handle *handle;
 	struct inode *inode = old_dentry->d_inode;
 	int err;
 	struct buffer_head *fe_bh = NULL;
@@ -668,68 +668,60 @@ static int ocfs2_link(struct dentry *old_dentry,
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   dentry->d_name.len, dentry->d_name.name);
 
-	if (S_ISDIR(inode->i_mode)) {
-		err = -EPERM;
-		goto bail;
-	}
-
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		err = -ENOMEM;
-		goto bail;
-	}
+	if (S_ISDIR(inode->i_mode))
+		return -EPERM;
 
-	err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
+	err = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
-		goto bail;
+		return err;
 	}
 
 	if (!dir->i_nlink) {
 		err = -ENOENT;
-		goto bail;
+		goto out;
 	}
 
 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
 					dentry->d_name.len);
 	if (err)
-		goto bail;
+		goto out;
 
 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
 					   dentry->d_name.name,
 					   dentry->d_name.len, &de_bh);
 	if (err < 0) {
 		mlog_errno(err);
-		goto bail;
+		goto out;
 	}
 
-	err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
+	err = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
-		goto bail;
+		goto out;
 	}
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
 		err = -EMLINK;
-		goto bail;
+		goto out_unlock_inode;
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_LINK_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_LINK_CREDITS);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
 		handle = NULL;
 		mlog_errno(err);
-		goto bail;
+		goto out_unlock_inode;
 	}
 
 	err = ocfs2_journal_access(handle, inode, fe_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (err < 0) {
 		mlog_errno(err);
-		goto bail;
+		goto out_commit;
 	}
 
 	inc_nlink(inode);
@@ -743,7 +735,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		le16_add_cpu(&fe->i_links_count, -1);
 		drop_nlink(inode);
 		mlog_errno(err);
-		goto bail;
+		goto out_commit;
 	}
 
 	err = ocfs2_add_entry(handle, dentry, inode,
@@ -753,21 +745,27 @@ static int ocfs2_link(struct dentry *old_dentry,
 		le16_add_cpu(&fe->i_links_count, -1);
 		drop_nlink(inode);
 		mlog_errno(err);
-		goto bail;
+		goto out_commit;
 	}
 
 	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
 	if (err) {
 		mlog_errno(err);
-		goto bail;
+		goto out_commit;
 	}
 
 	atomic_inc(&inode->i_count);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
-bail:
-	if (handle)
-		ocfs2_commit_trans(handle);
+
+out_commit:
+	ocfs2_commit_trans(handle);
+out_unlock_inode:
+	ocfs2_meta_unlock(inode, 1);
+
+out:
+	ocfs2_meta_unlock(dir, 1);
+
 	if (de_bh)
 		brelse(de_bh);
 	if (fe_bh)
-- 
cgit v1.2.3


From 5098c27bb8cd89e30d909ac9d76e305d334dadaf Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 5 Oct 2006 18:12:57 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock() in orphan dir code

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 66 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 79f513001a6..dc2e89a0c4a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -112,7 +112,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			     struct inode *inode2);
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
-				    struct ocfs2_journal_handle *handle,
+				    struct inode **ret_orphan_dir,
 				    struct inode *inode,
 				    char *name,
 				    struct buffer_head **de_bh);
@@ -122,7 +122,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    struct inode *inode,
 			    struct ocfs2_dinode *fe,
 			    char *name,
-			    struct buffer_head *de_bh);
+			    struct buffer_head *de_bh,
+			    struct inode *orphan_dir_inode);
 
 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 				     struct ocfs2_journal_handle *handle,
@@ -813,6 +814,7 @@ static int ocfs2_unlink(struct inode *dir,
 {
 	int status;
 	struct inode *inode = dentry->d_inode;
+	struct inode *orphan_dir = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 	u64 blkno;
 	struct ocfs2_dinode *fe = NULL;
@@ -895,7 +897,7 @@ static int ocfs2_unlink(struct inode *dir,
 	}
 
 	if (inode_is_unlinkable(inode)) {
-		status = ocfs2_prepare_orphan_dir(osb, handle, inode,
+		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
 						  orphan_name,
 						  &orphan_entry_bh);
 		if (status < 0) {
@@ -923,7 +925,7 @@ static int ocfs2_unlink(struct inode *dir,
 
 	if (inode_is_unlinkable(inode)) {
 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
-					  orphan_entry_bh);
+					  orphan_entry_bh, orphan_dir);
 		if (status < 0) {
 			mlog_errno(status);
 			goto leave;
@@ -962,6 +964,13 @@ leave:
 	if (handle)
 		ocfs2_commit_trans(handle);
 
+	if (orphan_dir) {
+		/* This was locked for us in ocfs2_prepare_orphan_dir() */
+		ocfs2_meta_unlock(orphan_dir, 1);
+		mutex_unlock(&orphan_dir->i_mutex);
+		iput(orphan_dir);
+	}
+
 	if (fe_bh)
 		brelse(fe_bh);
 
@@ -1053,6 +1062,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	int status = 0, rename_lock = 0;
 	struct inode *old_inode = old_dentry->d_inode;
 	struct inode *new_inode = new_dentry->d_inode;
+	struct inode *orphan_dir = NULL;
 	struct ocfs2_dinode *newfe = NULL;
 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
 	struct buffer_head *orphan_entry_bh = NULL;
@@ -1252,7 +1262,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
-			status = ocfs2_prepare_orphan_dir(osb, handle,
+			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
 							  new_inode,
 							  orphan_name,
 							  &orphan_entry_bh);
@@ -1307,7 +1317,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		    (newfe->i_links_count == cpu_to_le16(1))){
 			status = ocfs2_orphan_add(osb, handle, new_inode,
 						  newfe, orphan_name,
-						  orphan_entry_bh);
+						  orphan_entry_bh, orphan_dir);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail;
@@ -1426,6 +1436,13 @@ bail:
 	if (handle)
 		ocfs2_commit_trans(handle);
 
+	if (orphan_dir) {
+		/* This was locked for us in ocfs2_prepare_orphan_dir() */
+		ocfs2_meta_unlock(orphan_dir, 1);
+		mutex_unlock(&orphan_dir->i_mutex);
+		iput(orphan_dir);
+	}
+
 	if (new_inode)
 		sync_mapping_buffers(old_inode->i_mapping);
 
@@ -2085,19 +2102,19 @@ bail:
 }
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
-				    struct ocfs2_journal_handle *handle,
+				    struct inode **ret_orphan_dir,
 				    struct inode *inode,
 				    char *name,
 				    struct buffer_head **de_bh)
 {
-	struct inode *orphan_dir_inode = NULL;
+	struct inode *orphan_dir_inode;
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 
 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
 	if (status < 0) {
 		mlog_errno(status);
-		goto leave;
+		return status;
 	}
 
 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
@@ -2106,11 +2123,12 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 	if (!orphan_dir_inode) {
 		status = -ENOENT;
 		mlog_errno(status);
-		goto leave;
+		return status;
 	}
 
-	ocfs2_handle_add_inode(handle, orphan_dir_inode);
-	status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
+	mutex_lock(&orphan_dir_inode->i_mutex);
+
+	status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -2120,13 +2138,19 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 					      orphan_dir_bh, name,
 					      OCFS2_ORPHAN_NAMELEN, de_bh);
 	if (status < 0) {
+		ocfs2_meta_unlock(orphan_dir_inode, 1);
+
 		mlog_errno(status);
 		goto leave;
 	}
 
+	*ret_orphan_dir = orphan_dir_inode;
+
 leave:
-	if (orphan_dir_inode)
+	if (status) {
+		mutex_unlock(&orphan_dir_inode->i_mutex);
 		iput(orphan_dir_inode);
+	}
 
 	if (orphan_dir_bh)
 		brelse(orphan_dir_bh);
@@ -2140,24 +2164,15 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    struct inode *inode,
 			    struct ocfs2_dinode *fe,
 			    char *name,
-			    struct buffer_head *de_bh)
+			    struct buffer_head *de_bh,
+			    struct inode *orphan_dir_inode)
 {
-	struct inode *orphan_dir_inode = NULL;
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 	struct ocfs2_dinode *orphan_fe;
 
 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
 
-	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
-						       ORPHAN_DIR_SYSTEM_INODE,
-						       osb->slot_num);
-	if (!orphan_dir_inode) {
-		status = -ENOENT;
-		mlog_errno(status);
-		goto leave;
-	}
-
 	status = ocfs2_read_block(osb,
 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
 				  &orphan_dir_bh, OCFS2_BH_CACHED,
@@ -2209,9 +2224,6 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
 leave:
-	if (orphan_dir_inode)
-		iput(orphan_dir_inode);
-
 	if (orphan_dir_bh)
 		brelse(orphan_dir_bh);
 
-- 
cgit v1.2.3


From 30a4f5e86bc7bc388ce808117e7722706f739602 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 11:49:45 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock in ocfs2_unlink()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index dc2e89a0c4a..f64cff0ceed 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -813,6 +813,7 @@ static int ocfs2_unlink(struct inode *dir,
 			struct dentry *dentry)
 {
 	int status;
+	int child_locked = 0;
 	struct inode *inode = dentry->d_inode;
 	struct inode *orphan_dir = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
@@ -835,22 +836,14 @@ static int ocfs2_unlink(struct inode *dir,
 
 	if (inode == osb->root_inode) {
 		mlog(0, "Cannot delete the root directory\n");
-		status = -EPERM;
-		goto leave;
-	}
-
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto leave;
+		return -EPERM;
 	}
 
-	status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
+	status = ocfs2_meta_lock(dir, NULL, &parent_node_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
-		goto leave;
+		return status;
 	}
 
 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
@@ -871,12 +864,13 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
+	status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
 		goto leave;
 	}
+	child_locked = 1;
 
 	if (S_ISDIR(inode->i_mode)) {
 	       	if (!ocfs2_empty_dir(inode)) {
@@ -906,7 +900,7 @@ static int ocfs2_unlink(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_UNLINK_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_UNLINK_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -964,6 +958,11 @@ leave:
 	if (handle)
 		ocfs2_commit_trans(handle);
 
+	if (child_locked)
+		ocfs2_meta_unlock(inode, 1);
+
+	ocfs2_meta_unlock(dir, 1);
+
 	if (orphan_dir) {
 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
 		ocfs2_meta_unlock(orphan_dir, 1);
-- 
cgit v1.2.3


From 6d8fc40e63401be065b0aa98dfa4246deca04b8c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 11:54:33 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock in ocfs2_symlink()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f64cff0ceed..7e9da946c77 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1603,19 +1603,12 @@ static int ocfs2_symlink(struct inode *dir,
 
 	credits = ocfs2_calc_symlink_credits(sb);
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	/* lock the parent directory */
-	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
+	status = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
-		goto bail;
+		return status;
 	}
 
 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
@@ -1638,6 +1631,13 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
+	handle = ocfs2_alloc_handle(osb);
+	if (handle == NULL) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
 	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
 	if (status < 0) {
 		if (status != -ENOSPC)
@@ -1734,6 +1734,9 @@ static int ocfs2_symlink(struct inode *dir,
 bail:
 	if (handle)
 		ocfs2_commit_trans(handle);
+
+	ocfs2_meta_unlock(dir, 1);
+
 	if (new_fe_bh)
 		brelse(new_fe_bh);
 	if (parent_fe_bh)
-- 
cgit v1.2.3


From 8d5596c687c49c1d8812c3456946dec15d069139 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 15:04:51 -0700
Subject: ocfs2: don't pass handle to ocfs2_meta_lock in ocfs2_rename()

Take and drop the locks directly.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 56 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 7e9da946c77..5bd5f9948a2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -104,13 +104,6 @@ static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
 			      struct buffer_head *fe_bh,
 			      struct ocfs2_alloc_context *data_ac);
 
-static int ocfs2_double_lock(struct ocfs2_super *osb,
-			     struct ocfs2_journal_handle *handle,
-			     struct buffer_head **bh1,
-			     struct inode *inode1,
-			     struct buffer_head **bh2,
-			     struct inode *inode2);
-
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct inode **ret_orphan_dir,
 				    struct inode *inode,
@@ -992,7 +985,6 @@ leave:
  * if they have the same id, then the 1st one is the only one locked.
  */
 static int ocfs2_double_lock(struct ocfs2_super *osb,
-			     struct ocfs2_journal_handle *handle,
 			     struct buffer_head **bh1,
 			     struct inode *inode1,
 			     struct buffer_head **bh2,
@@ -1008,8 +1000,6 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 		   (unsigned long long)oi1->ip_blkno,
 		   (unsigned long long)oi2->ip_blkno);
 
-	BUG_ON(!handle);
-
 	if (*bh1)
 		*bh1 = NULL;
 	if (*bh2)
@@ -1029,25 +1019,41 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			inode1 = tmpinode;
 		}
 		/* lock id2 */
-		status = ocfs2_meta_lock(inode2, handle, bh2, 1);
+		status = ocfs2_meta_lock(inode2, NULL, bh2, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
 			goto bail;
 		}
 	}
+
 	/* lock id1 */
-	status = ocfs2_meta_lock(inode1, handle, bh1, 1);
+	status = ocfs2_meta_lock(inode1, NULL, bh1, 1);
 	if (status < 0) {
+		/*
+		 * An error return must mean that no cluster locks
+		 * were held on function exit.
+		 */
+		if (oi1->ip_blkno != oi2->ip_blkno)
+			ocfs2_meta_unlock(inode2, 1);
+
 		if (status != -ENOENT)
 			mlog_errno(status);
-		goto bail;
 	}
+
 bail:
 	mlog_exit(status);
 	return status;
 }
 
+static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+	ocfs2_meta_unlock(inode1, 1);
+
+	if (inode1 != inode2)
+		ocfs2_meta_unlock(inode2, 1);
+}
+
 #define PARENT_INO(buffer) \
 	((struct ocfs2_dir_entry *) \
 	 ((char *)buffer + \
@@ -1058,7 +1064,8 @@ static int ocfs2_rename(struct inode *old_dir,
 			struct inode *new_dir,
 			struct dentry *new_dentry)
 {
-	int status = 0, rename_lock = 0;
+	int status = 0, rename_lock = 0, parents_locked = 0;
+	int old_child_locked = 0, new_child_locked = 0;
 	struct inode *old_inode = old_dentry->d_inode;
 	struct inode *new_inode = new_dentry->d_inode;
 	struct inode *orphan_dir = NULL;
@@ -1122,13 +1129,13 @@ static int ocfs2_rename(struct inode *old_dir,
 	}
 
 	/* if old and new are the same, this'll just do one lock. */
-	status = ocfs2_double_lock(osb, handle,
-				  &old_dir_bh, old_dir,
-				  &new_dir_bh, new_dir);
+	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
+				   &new_dir_bh, new_dir);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
+	parents_locked = 1;
 
 	/* make sure both dirs have bhs
 	 * get an extra ref on old_dir_bh if old==new */
@@ -1149,12 +1156,13 @@ static int ocfs2_rename(struct inode *old_dir,
 	 * the vote thread on other nodes won't have to concurrently
 	 * downconvert the inode and the dentry locks.
 	 */
-	status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
+	status = ocfs2_meta_lock(old_inode, NULL, NULL, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
 		goto bail;
 	}
+	old_child_locked = 1;
 
 	status = ocfs2_remote_dentry_delete(old_dentry);
 	if (status < 0) {
@@ -1240,12 +1248,13 @@ static int ocfs2_rename(struct inode *old_dir,
 			goto bail;
 		}
 
-		status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
+		status = ocfs2_meta_lock(new_inode, NULL, &newfe_bh, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
 			goto bail;
 		}
+		new_child_locked = 1;
 
 		status = ocfs2_remote_dentry_delete(new_dentry);
 		if (status < 0) {
@@ -1435,6 +1444,15 @@ bail:
 	if (handle)
 		ocfs2_commit_trans(handle);
 
+	if (parents_locked)
+		ocfs2_double_unlock(old_dir, new_dir);
+
+	if (old_child_locked)
+		ocfs2_meta_unlock(old_inode, 1);
+
+	if (new_child_locked)
+		ocfs2_meta_unlock(new_inode, 1);
+
 	if (orphan_dir) {
 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
 		ocfs2_meta_unlock(orphan_dir, 1);
-- 
cgit v1.2.3


From da5cbf2f9df922cfdafa39351691fa83517f1e25 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 18:34:35 -0700
Subject: ocfs2: don't use handle for locking in allocation functions

Instead we record our state on the allocation context structure which all
callers already know about and lifetime correctly. This means the
reservation functions don't need a handle passed in any more, and we can
also take it off the alloc context.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dir.c        |  14 ++----
 fs/ocfs2/file.c       |  19 ++-----
 fs/ocfs2/localalloc.c |  26 +++-------
 fs/ocfs2/localalloc.h |   1 -
 fs/ocfs2/namei.c      |  26 +++-------
 fs/ocfs2/suballoc.c   | 134 ++++++++++++++++++++------------------------------
 fs/ocfs2/suballoc.h   |   4 --
 7 files changed, 72 insertions(+), 152 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 04e01915b86..d10ccea82fa 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -409,13 +409,6 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	mlog(0, "extending dir %llu (i_size = %lld)\n",
 	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	/* dir->i_size is always block aligned. */
 	spin_lock(&OCFS2_I(dir)->ip_lock);
 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
@@ -428,8 +421,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 		}
 
 		if (!num_free_extents) {
-			status = ocfs2_reserve_new_metadata(osb, handle,
-							    fe, &meta_ac);
+			status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
 			if (status < 0) {
 				if (status != -ENOSPC)
 					mlog_errno(status);
@@ -437,7 +429,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 			}
 		}
 
-		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
+		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);
@@ -450,7 +442,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
 	}
 
-	handle = ocfs2_start_trans(osb, handle, credits);
+	handle = ocfs2_start_trans(osb, NULL, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d8bd2c32f08..bd944e893bb 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -463,13 +463,6 @@ restart_all:
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
 	     fe->i_clusters, clusters_to_add);
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto leave;
-	}
-
 	num_free_extents = ocfs2_num_free_extents(osb,
 						  inode,
 						  fe);
@@ -480,10 +473,7 @@ restart_all:
 	}
 
 	if (!num_free_extents) {
-		status = ocfs2_reserve_new_metadata(osb,
-						    handle,
-						    fe,
-						    &meta_ac);
+		status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);
@@ -491,10 +481,7 @@ restart_all:
 		}
 	}
 
-	status = ocfs2_reserve_clusters(osb,
-					handle,
-					clusters_to_add,
-					&data_ac);
+	status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -509,7 +496,7 @@ restart_all:
 	drop_alloc_sem = 1;
 
 	credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
-	handle = ocfs2_start_trans(osb, handle, credits);
+	handle = ocfs2_start_trans(osb, NULL, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 75f09f1b4ce..869383e7c56 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -64,7 +64,6 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 				    struct buffer_head *main_bm_bh);
 
 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
-						struct ocfs2_journal_handle *handle,
 						struct ocfs2_alloc_context **ac,
 						struct inode **bitmap_inode,
 						struct buffer_head **bitmap_bh);
@@ -448,7 +447,6 @@ out:
  * our own in order to shift windows.
  */
 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
-				   struct ocfs2_journal_handle *passed_handle,
 				   u32 bits_wanted,
 				   struct ocfs2_alloc_context *ac)
 {
@@ -459,9 +457,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	BUG_ON(!passed_handle);
 	BUG_ON(!ac);
-	BUG_ON(passed_handle->k_handle);
 
 	local_alloc_inode =
 		ocfs2_get_system_file_inode(osb,
@@ -472,7 +468,11 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto bail;
 	}
-	ocfs2_handle_add_inode(passed_handle, local_alloc_inode);
+
+	mutex_lock(&local_alloc_inode->i_mutex);
+
+	ac->ac_inode = local_alloc_inode;
+	ac->ac_which = OCFS2_AC_USE_LOCAL;
 
 	if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
 		status = -ENOSPC;
@@ -511,14 +511,10 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 		}
 	}
 
-	ac->ac_inode = igrab(local_alloc_inode);
 	get_bh(osb->local_alloc_bh);
 	ac->ac_bh = osb->local_alloc_bh;
-	ac->ac_which = OCFS2_AC_USE_LOCAL;
 	status = 0;
 bail:
-	if (local_alloc_inode)
-		iput(local_alloc_inode);
 
 	mlog_exit(status);
 	return status;
@@ -774,7 +770,6 @@ bail:
 }
 
 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
-						struct ocfs2_journal_handle *handle,
 						struct ocfs2_alloc_context **ac,
 						struct inode **bitmap_inode,
 						struct buffer_head **bitmap_bh)
@@ -788,7 +783,6 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	(*ac)->ac_handle = handle;
 	(*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb);
 
 	status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
@@ -891,16 +885,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	/* This will lock the main bitmap for us. */
 	status = ocfs2_local_alloc_reserve_for_window(osb,
-						      handle,
 						      &ac,
 						      &main_bm_inode,
 						      &main_bm_bh);
@@ -910,7 +896,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 30f88ce14e4..4bd624ce380 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -42,7 +42,6 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb,
 
 struct ocfs2_alloc_context;
 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
-				   struct ocfs2_journal_handle *passed_handle,
 				   u32 bits_wanted,
 				   struct ocfs2_alloc_context *ac);
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 5bd5f9948a2..ca012446d8e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -334,13 +334,6 @@ static int ocfs2_mknod(struct inode *dir,
 		return status;
 	}
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto leave;
-	}
-
 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
 		status = -EMLINK;
 		goto leave;
@@ -368,7 +361,7 @@ static int ocfs2_mknod(struct inode *dir,
 	}
 
 	/* reserve an inode spot */
-	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
+	status = ocfs2_reserve_new_inode(osb, &inode_ac);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -378,7 +371,7 @@ static int ocfs2_mknod(struct inode *dir,
 	/* are we making a directory? If so, reserve a cluster for his
 	 * 1st extent. */
 	if (S_ISDIR(mode)) {
-		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
+		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);
@@ -386,7 +379,7 @@ static int ocfs2_mknod(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_MKNOD_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_MKNOD_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1649,14 +1642,7 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
-	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
+	status = ocfs2_reserve_new_inode(osb, &inode_ac);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -1665,7 +1651,7 @@ static int ocfs2_symlink(struct inode *dir,
 
 	/* don't reserve bitmap space for fast symlinks. */
 	if (l > ocfs2_fast_symlink_chars(sb)) {
-		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
+		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);
@@ -1673,7 +1659,7 @@ static int ocfs2_symlink(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, handle, credits);
+	handle = ocfs2_start_trans(osb, NULL, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 32093409e25..31bda54fefe 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -59,9 +59,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 				   struct inode *alloc_inode,
 				   struct buffer_head *bh);
 
-static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
-				       struct ocfs2_alloc_context *ac);
-
 static int ocfs2_cluster_group_search(struct inode *inode,
 				      struct buffer_head *group_bh,
 				      u32 bits_wanted, u32 min_bits,
@@ -72,6 +69,7 @@ static int ocfs2_block_group_search(struct inode *inode,
 				    u16 *bit_off, u16 *bits_found);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     struct ocfs2_alloc_context *ac,
+				     struct ocfs2_journal_handle *handle,
 				     u32 bits_wanted,
 				     u32 min_bits,
 				     u16 *bit_off,
@@ -120,8 +118,16 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
 {
-	if (ac->ac_inode)
-		iput(ac->ac_inode);
+	struct inode *inode = ac->ac_inode;
+
+	if (inode) {
+		if (ac->ac_which != OCFS2_AC_USE_LOCAL)
+			ocfs2_meta_unlock(inode, 1);
+
+		mutex_unlock(&inode->i_mutex);
+
+		iput(inode);
+	}
 	if (ac->ac_bh)
 		brelse(ac->ac_bh);
 	kfree(ac);
@@ -284,16 +290,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	cl = &fe->id2.i_chain;
 	status = ocfs2_reserve_clusters(osb,
-					handle,
 					le16_to_cpu(cl->cl_cpg),
 					&ac);
 	if (status < 0) {
@@ -402,27 +400,38 @@ bail:
 }
 
 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
-				       struct ocfs2_alloc_context *ac)
+				       struct ocfs2_alloc_context *ac,
+				       int type,
+				       u32 slot)
 {
 	int status;
 	u32 bits_wanted = ac->ac_bits_wanted;
-	struct inode *alloc_inode = ac->ac_inode;
+	struct inode *alloc_inode;
 	struct buffer_head *bh = NULL;
-	struct ocfs2_journal_handle *handle = ac->ac_handle;
 	struct ocfs2_dinode *fe;
 	u32 free_bits;
 
 	mlog_entry_void();
 
-	BUG_ON(handle->k_handle);
+	alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
+	if (!alloc_inode) {
+		mlog_errno(-EINVAL);
+		return -EINVAL;
+	}
 
-	ocfs2_handle_add_inode(handle, alloc_inode);
-	status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1);
+	mutex_lock(&alloc_inode->i_mutex);
+
+	status = ocfs2_meta_lock(alloc_inode, NULL, &bh, 1);
 	if (status < 0) {
+		mutex_unlock(&alloc_inode->i_mutex);
+		iput(alloc_inode);
+
 		mlog_errno(status);
-		goto bail;
+		return status;
 	}
 
+	ac->ac_inode = alloc_inode;
+
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
 		OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
@@ -473,12 +482,11 @@ bail:
 }
 
 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
-			       struct ocfs2_journal_handle *handle,
 			       struct ocfs2_dinode *fe,
 			       struct ocfs2_alloc_context **ac)
 {
 	int status;
-	struct inode *alloc_inode = NULL;
+	u32 slot;
 
 	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
@@ -488,28 +496,18 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
 	}
 
 	(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
-	(*ac)->ac_handle = handle;
 	(*ac)->ac_which = OCFS2_AC_USE_META;
 
 #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
-	alloc_inode = ocfs2_get_system_file_inode(osb,
-						  EXTENT_ALLOC_SYSTEM_INODE,
-						  0);
+	slot = 0;
 #else
-	alloc_inode = ocfs2_get_system_file_inode(osb,
-						  EXTENT_ALLOC_SYSTEM_INODE,
-						  osb->slot_num);
+	slot = osb->slot_num;
 #endif
-	if (!alloc_inode) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
 
-	(*ac)->ac_inode = igrab(alloc_inode);
 	(*ac)->ac_group_search = ocfs2_block_group_search;
 
-	status = ocfs2_reserve_suballoc_bits(osb, (*ac));
+	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
+					     EXTENT_ALLOC_SYSTEM_INODE, slot);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -523,19 +521,14 @@ bail:
 		*ac = NULL;
 	}
 
-	if (alloc_inode)
-		iput(alloc_inode);
-
 	mlog_exit(status);
 	return status;
 }
 
 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
 			    struct ocfs2_alloc_context **ac)
 {
 	int status;
-	struct inode *alloc_inode = NULL;
 
 	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
@@ -545,22 +538,13 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 	}
 
 	(*ac)->ac_bits_wanted = 1;
-	(*ac)->ac_handle = handle;
 	(*ac)->ac_which = OCFS2_AC_USE_INODE;
 
-	alloc_inode = ocfs2_get_system_file_inode(osb,
-						  INODE_ALLOC_SYSTEM_INODE,
-						  osb->slot_num);
-	if (!alloc_inode) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
-	(*ac)->ac_inode = igrab(alloc_inode);
 	(*ac)->ac_group_search = ocfs2_block_group_search;
 
-	status = ocfs2_reserve_suballoc_bits(osb, *ac);
+	status = ocfs2_reserve_suballoc_bits(osb, *ac,
+					     INODE_ALLOC_SYSTEM_INODE,
+					     osb->slot_num);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -574,9 +558,6 @@ bail:
 		*ac = NULL;
 	}
 
-	if (alloc_inode)
-		iput(alloc_inode);
-
 	mlog_exit(status);
 	return status;
 }
@@ -588,20 +569,17 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 {
 	int status;
 
-	ac->ac_inode = ocfs2_get_system_file_inode(osb,
-						   GLOBAL_BITMAP_SYSTEM_INODE,
-						   OCFS2_INVALID_SLOT);
-	if (!ac->ac_inode) {
-		status = -EINVAL;
-		mlog(ML_ERROR, "Could not get bitmap inode!\n");
-		goto bail;
-	}
 	ac->ac_which = OCFS2_AC_USE_MAIN;
 	ac->ac_group_search = ocfs2_cluster_group_search;
 
-	status = ocfs2_reserve_suballoc_bits(osb, ac);
-	if (status < 0 && status != -ENOSPC)
+	status = ocfs2_reserve_suballoc_bits(osb, ac,
+					     GLOBAL_BITMAP_SYSTEM_INODE,
+					     OCFS2_INVALID_SLOT);
+	if (status < 0 && status != -ENOSPC) {
 		mlog_errno(status);
+		goto bail;
+	}
+
 bail:
 	return status;
 }
@@ -610,7 +588,6 @@ bail:
  * use so we figure it out for them, but unfortunately this clutters
  * things a bit. */
 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-			   struct ocfs2_journal_handle *handle,
 			   u32 bits_wanted,
 			   struct ocfs2_alloc_context **ac)
 {
@@ -618,8 +595,6 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	BUG_ON(!handle);
-
 	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
@@ -628,12 +603,10 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 	}
 
 	(*ac)->ac_bits_wanted = bits_wanted;
-	(*ac)->ac_handle = handle;
 
 	status = -ENOSPC;
 	if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
 		status = ocfs2_reserve_local_alloc_bits(osb,
-							handle,
 							bits_wanted,
 							*ac);
 		if ((status < 0) && (status != -ENOSPC)) {
@@ -1055,6 +1028,7 @@ out:
 }
 
 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+				  struct ocfs2_journal_handle *handle,
 				  u32 bits_wanted,
 				  u32 min_bits,
 				  u16 *bit_off,
@@ -1067,7 +1041,6 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 	struct buffer_head *group_bh = NULL;
 	struct ocfs2_group_desc *gd;
 	struct inode *alloc_inode = ac->ac_inode;
-	struct ocfs2_journal_handle *handle = ac->ac_handle;
 
 	ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
 			       &group_bh, OCFS2_BH_CACHED, alloc_inode);
@@ -1115,6 +1088,7 @@ out:
 }
 
 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
+			      struct ocfs2_journal_handle *handle,
 			      u32 bits_wanted,
 			      u32 min_bits,
 			      u16 *bit_off,
@@ -1126,7 +1100,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	u16 chain, tmp_bits;
 	u32 tmp_used;
 	u64 next_group;
-	struct ocfs2_journal_handle *handle = ac->ac_handle;
 	struct inode *alloc_inode = ac->ac_inode;
 	struct buffer_head *group_bh = NULL;
 	struct buffer_head *prev_group_bh = NULL;
@@ -1272,6 +1245,7 @@ bail:
 /* will give out up to bits_wanted contiguous bits. */
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     struct ocfs2_alloc_context *ac,
+				     struct ocfs2_journal_handle *handle,
 				     u32 bits_wanted,
 				     u32 min_bits,
 				     u16 *bit_off,
@@ -1313,8 +1287,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 		 * by jumping straight to the most recently used
 		 * allocation group. This helps us mantain some
 		 * contiguousness across allocations. */
-		status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
-						bit_off, num_bits,
+		status = ocfs2_search_one_group(ac, handle, bits_wanted,
+						min_bits, bit_off, num_bits,
 						hint_blkno, &bits_left);
 		if (!status) {
 			/* Be careful to update *bg_blkno here as the
@@ -1336,7 +1310,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 	ac->ac_chain = victim;
 	ac->ac_allow_chain_relink = 1;
 
-	status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
+	status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
 				    num_bits, bg_blkno, &bits_left);
 	if (!status)
 		goto set_hint;
@@ -1360,7 +1334,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 			continue;
 
 		ac->ac_chain = i;
-		status = ocfs2_search_chain(ac, bits_wanted, min_bits,
+		status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
 					    bit_off, num_bits, bg_blkno,
 					    &bits_left);
 		if (!status)
@@ -1401,10 +1375,10 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
 	BUG_ON(!ac);
 	BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
-	BUG_ON(ac->ac_handle != handle);
 
 	status = ocfs2_claim_suballoc_bits(osb,
 					   ac,
+					   handle,
 					   bits_wanted,
 					   1,
 					   suballoc_bit_start,
@@ -1440,10 +1414,10 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
 	BUG_ON(ac->ac_bits_given != 0);
 	BUG_ON(ac->ac_bits_wanted != 1);
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
-	BUG_ON(ac->ac_handle != handle);
 
 	status = ocfs2_claim_suballoc_bits(osb,
 					   ac,
+					   handle,
 					   1,
 					   1,
 					   suballoc_bit,
@@ -1546,7 +1520,6 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
 	       && ac->ac_which != OCFS2_AC_USE_MAIN);
-	BUG_ON(ac->ac_handle != handle);
 
 	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
 		status = ocfs2_claim_local_alloc_bits(osb,
@@ -1572,6 +1545,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 
 		status = ocfs2_claim_suballoc_bits(osb,
 						   ac,
+						   handle,
 						   bits_wanted,
 						   min_clusters,
 						   &bg_bit_off,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index c787838d105..e7c4b277c8d 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -43,7 +43,6 @@ struct ocfs2_alloc_context {
 #define OCFS2_AC_USE_INODE 3
 #define OCFS2_AC_USE_META  4
 	u32    ac_which;
-	struct ocfs2_journal_handle *ac_handle;
 
 	/* these are used by the chain search */
 	u16    ac_chain;
@@ -60,14 +59,11 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
 }
 
 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
-			       struct ocfs2_journal_handle *handle,
 			       struct ocfs2_dinode *fe,
 			       struct ocfs2_alloc_context **ac);
 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
 			    struct ocfs2_alloc_context **ac);
 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-			   struct ocfs2_journal_handle *handle,
 			   u32 bits_wanted,
 			   struct ocfs2_alloc_context **ac);
 
-- 
cgit v1.2.3


From 85b9e783cbc8cf4acc7bfaa76f37ea26b426f514 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 18:40:41 -0700
Subject: ocfs2: Don't allocate handle early in ocfs2_rename()

It isn't used until ocfs2_start_trans() anyway.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ca012446d8e..8d03ef44686 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1114,13 +1114,6 @@ static int ocfs2_rename(struct inode *old_dir,
 		rename_lock = 1;
 	}
 
-	handle = ocfs2_alloc_handle(osb);
-	if (handle == NULL) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-
 	/* if old and new are the same, this'll just do one lock. */
 	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
 				   &new_dir_bh, new_dir);
@@ -1291,7 +1284,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_RENAME_CREDITS);
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_RENAME_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
-- 
cgit v1.2.3


From 02928a71ae6da6e3e205d99e1fa1a1f598ddb62d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 18:51:46 -0700
Subject: ocfs2: remove unused ocfs2_handle_add_inode()

We can also delete the unused infrastructure which was once in place to
support this functionality. ocfs2_inode_private loses ip_handle and
ip_handle_list. ocfs2_journal_handle loses handle_list.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/inode.c   |  6 ------
 fs/ocfs2/inode.h   |  7 -------
 fs/ocfs2/journal.c | 43 -------------------------------------------
 fs/ocfs2/journal.h |  9 ---------
 fs/ocfs2/super.c   |  2 --
 5 files changed, 67 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 16e8e74dc96..20e60e4b1f1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1067,12 +1067,6 @@ void ocfs2_clear_inode(struct inode *inode)
 	mlog_bug_on_msg(oi->ip_open_count,
 			"Clear inode of %llu has open count %d\n",
 			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
-	mlog_bug_on_msg(!list_empty(&oi->ip_handle_list),
-			"Clear inode of %llu has non empty handle list\n",
-			(unsigned long long)oi->ip_blkno);
-	mlog_bug_on_msg(oi->ip_handle,
-			"Clear inode of %llu has non empty handle pointer\n",
-			(unsigned long long)oi->ip_blkno);
 
 	/* Clear all other flags. */
 	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9957810fdf8..9b7354545cb 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -48,13 +48,6 @@ struct ocfs2_inode_info
 
 	struct mutex			ip_io_mutex;
 
-	/* Used by the journalling code to attach an inode to a
-	 * handle.  These are protected by ip_io_mutex in order to lock
-	 * out other I/O to the inode until we either commit or
-	 * abort. */
-	struct list_head		ip_handle_list;
-	struct ocfs2_journal_handle	*ip_handle;
-
 	u32				ip_flags; /* see below */
 	u32				ip_attr; /* inode attributes */
 
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ca6f2094b00..f02af63e5fa 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -128,7 +128,6 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 	retval->k_handle = NULL;
 
 	INIT_LIST_HEAD(&retval->locks);
-	INIT_LIST_HEAD(&retval->inode_list);
 	retval->journal = osb->journal;
 
 	return retval;
@@ -202,51 +201,12 @@ done_free:
 	return ERR_PTR(ret);
 }
 
-void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
-			    struct inode *inode)
-{
-	BUG_ON(!handle);
-	BUG_ON(!inode);
-
-	atomic_inc(&inode->i_count);
-
-	/* we're obviously changing it... */
-	mutex_lock(&inode->i_mutex);
-
-	/* sanity check */
-	BUG_ON(OCFS2_I(inode)->ip_handle);
-	BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list));
-
-	OCFS2_I(inode)->ip_handle = handle;
-	list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
-}
-
-static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
-{
-	struct list_head *p, *n;
-	struct inode *inode;
-	struct ocfs2_inode_info *oi;
-
-	list_for_each_safe(p, n, &handle->inode_list) {
-		oi = list_entry(p, struct ocfs2_inode_info,
-				ip_handle_list);
-		inode = &oi->vfs_inode;
-
-		OCFS2_I(inode)->ip_handle = NULL;
-		list_del_init(&OCFS2_I(inode)->ip_handle_list);
-
-		mutex_unlock(&inode->i_mutex);
-		iput(inode);
-	}
-}
-
 /* This is trivial so we do it out of the main commit
  * paths. Beware, it can be called from start_trans too! */
 static void ocfs2_commit_unstarted_handle(struct ocfs2_journal_handle *handle)
 {
 	mlog_entry_void();
 
-	ocfs2_handle_unlock_inodes(handle);
 	/* You are allowed to add journal locks before the transaction
 	 * has started. */
 	ocfs2_handle_cleanup_locks(handle->journal, handle);
@@ -272,9 +232,6 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 		return;
 	}
 
-	/* release inode semaphores we took during this transaction */
-	ocfs2_handle_unlock_inodes(handle);
-
 	/* ocfs2_extend_trans may have had to call journal_restart
 	 * which will always commit the transaction, but may return
 	 * error for any number of reasons. If this is the case, we
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6b5d548ca11..3a16d7d88e1 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -149,8 +149,6 @@ struct ocfs2_journal_handle {
 	struct list_head    locks;     /* A bunch of locks to
 					* release on commit. This
 					* should be a list_head */
-
-	struct list_head     inode_list;
 };
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
@@ -236,7 +234,6 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *                          ocfs2_handle_add_lock to indicate that a lock needs
  *                          to be released at the end of that handle. Locks
  *                          will be released in the order that they are added.
- *  ocfs2_handle_add_inode - Add a locked inode to a transaction.
  */
 
 /* You must always start_trans with a number of buffs > 0, but it's
@@ -293,12 +290,6 @@ int                  ocfs2_journal_dirty_data(handle_t *handle,
 					      struct buffer_head *bh);
 int                  ocfs2_handle_add_lock(struct ocfs2_journal_handle *handle,
 					   struct inode *inode);
-/*
- * Use this to protect from other processes reading buffer state while
- * it's in flight.
- */
-void                 ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
-					    struct inode *inode);
 
 /*
  *  Credit Macros:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 76b46ebbb10..f3027c2fe2b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -914,9 +914,7 @@ static void ocfs2_inode_init_once(void *data,
 		oi->ip_open_count = 0;
 		spin_lock_init(&oi->ip_lock);
 		ocfs2_extent_map_init(&oi->vfs_inode);
-		INIT_LIST_HEAD(&oi->ip_handle_list);
 		INIT_LIST_HEAD(&oi->ip_io_markers);
-		oi->ip_handle = NULL;
 		oi->ip_created_trans = 0;
 		oi->ip_last_trans = 0;
 		oi->ip_dir_start_lookup = 0;
-- 
cgit v1.2.3


From daf29e9cdab7219838c6b6e82380aec3466cf379 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 19:05:31 -0700
Subject: ocfs2: remove unused ocfs2_handle_add_lock()

This gets us rid of a slab we no longer need, as well as removing the
majority of what's left on ocfs2_journal_handle.

ocfs2_commit_unstarted_handle() has no more real work to do, so remove that
function too.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c |  6 ----
 fs/ocfs2/journal.c | 80 ++----------------------------------------------------
 fs/ocfs2/journal.h | 20 --------------
 fs/ocfs2/super.c   | 13 ---------
 4 files changed, 2 insertions(+), 117 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ff8d3dbdeb3..1c29b96b99e 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1668,12 +1668,6 @@ int ocfs2_meta_lock_full(struct inode *inode,
 		}
 	}
 
-	if (handle) {
-		status = ocfs2_handle_add_lock(handle, inode);
-		if (status < 0)
-			mlog_errno(status);
-	}
-
 bail:
 	if (status < 0) {
 		if (ret_bh && (*ret_bh)) {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f02af63e5fa..3ef1678c893 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,9 +57,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
 static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
-static void ocfs2_handle_cleanup_locks(struct ocfs2_journal *journal,
-				       struct ocfs2_journal_handle *handle);
-static void ocfs2_commit_unstarted_handle(struct ocfs2_journal_handle *handle);
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 				      int dirty);
 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -123,11 +120,8 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 		     "handle!\n");
 		return NULL;
 	}
-
-	retval->num_locks = 0;
 	retval->k_handle = NULL;
 
-	INIT_LIST_HEAD(&retval->locks);
 	retval->journal = osb->journal;
 
 	return retval;
@@ -195,27 +189,12 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 
 done_free:
 	if (handle)
-		ocfs2_commit_unstarted_handle(handle); /* will kfree handle */
+		kfree(handle);
 
 	mlog_exit(ret);
 	return ERR_PTR(ret);
 }
 
-/* This is trivial so we do it out of the main commit
- * paths. Beware, it can be called from start_trans too! */
-static void ocfs2_commit_unstarted_handle(struct ocfs2_journal_handle *handle)
-{
-	mlog_entry_void();
-
-	/* You are allowed to add journal locks before the transaction
-	 * has started. */
-	ocfs2_handle_cleanup_locks(handle->journal, handle);
-
-	kfree(handle);
-
-	mlog_exit_void();
-}
-
 void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 {
 	handle_t *jbd_handle;
@@ -227,7 +206,7 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 	BUG_ON(!handle);
 
 	if (!handle->k_handle) {
-		ocfs2_commit_unstarted_handle(handle);
+		kfree(handle);
 		mlog_exit_void();
 		return;
 	}
@@ -251,8 +230,6 @@ void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
 		handle->k_handle = NULL; /* it's been free'd in journal_stop */
 	}
 
-	ocfs2_handle_cleanup_locks(journal, handle);
-
 	up_read(&journal->j_trans_barrier);
 
 	kfree(handle);
@@ -394,59 +371,6 @@ int ocfs2_journal_dirty_data(handle_t *handle,
 	return err;
 }
 
-/* We always assume you're adding a metadata lock at level 'ex' */
-int ocfs2_handle_add_lock(struct ocfs2_journal_handle *handle,
-			  struct inode *inode)
-{
-	int status;
-	struct ocfs2_journal_lock *lock;
-
-	BUG_ON(!inode);
-
-	lock = kmem_cache_alloc(ocfs2_lock_cache, GFP_NOFS);
-	if (!lock) {
-		status = -ENOMEM;
-		mlog_errno(-ENOMEM);
-		goto bail;
-	}
-
-	if (!igrab(inode))
-		BUG();
-	lock->jl_inode = inode;
-
-	list_add_tail(&(lock->jl_lock_list), &(handle->locks));
-	handle->num_locks++;
-
-	status = 0;
-bail:
-	mlog_exit(status);
-	return status;
-}
-
-static void ocfs2_handle_cleanup_locks(struct ocfs2_journal *journal,
-				       struct ocfs2_journal_handle *handle)
-{
-	struct list_head *p, *n;
-	struct ocfs2_journal_lock *lock;
-	struct inode *inode;
-
-	list_for_each_safe(p, n, &(handle->locks)) {
-		lock = list_entry(p, struct ocfs2_journal_lock,
-				  jl_lock_list);
-		list_del(&lock->jl_lock_list);
-		handle->num_locks--;
-
-		inode = lock->jl_inode;
-		ocfs2_meta_unlock(inode, 1);
-		if (atomic_read(&inode->i_count) == 1)
-			mlog(ML_ERROR,
-			     "Inode %llu, I'm doing a last iput for!",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
-		iput(inode);
-		kmem_cache_free(ocfs2_lock_cache, lock);
-	}
-}
-
 #define OCFS2_DEFAULT_COMMIT_INTERVAL 	(HZ * 5)
 
 void ocfs2_set_journal_params(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3a16d7d88e1..7e9c1303d63 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -133,22 +133,9 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 	spin_unlock(&trans_inc_lock);
 }
 
-extern kmem_cache_t *ocfs2_lock_cache;
-
-struct ocfs2_journal_lock {
-	struct inode     *jl_inode;
-	struct list_head  jl_lock_list;
-};
-
 struct ocfs2_journal_handle {
 	handle_t            *k_handle; /* kernel handle.                */
 	struct ocfs2_journal        *journal;
-
-	/* The following two fields are for ocfs2_handle_add_lock */
-	int                 num_locks;
-	struct list_head    locks;     /* A bunch of locks to
-					* release on commit. This
-					* should be a list_head */
 };
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
@@ -229,11 +216,6 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *  ocfs2_journal_dirty    - Mark a journalled buffer as having dirty data.
  *  ocfs2_journal_dirty_data - Indicate that a data buffer should go out before
  *                             the current handle commits.
- *  ocfs2_handle_add_lock  - Sometimes we need to delay lock release
- *                          until after a transaction has been completed. Use
- *                          ocfs2_handle_add_lock to indicate that a lock needs
- *                          to be released at the end of that handle. Locks
- *                          will be released in the order that they are added.
  */
 
 /* You must always start_trans with a number of buffs > 0, but it's
@@ -288,8 +270,6 @@ int                  ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
 					 struct buffer_head *bh);
 int                  ocfs2_journal_dirty_data(handle_t *handle,
 					      struct buffer_head *bh);
-int                  ocfs2_handle_add_lock(struct ocfs2_journal_handle *handle,
-					   struct inode *inode);
 
 /*
  *  Credit Macros:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f3027c2fe2b..290d531bed6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -70,8 +70,6 @@
 
 static kmem_cache_t *ocfs2_inode_cachep = NULL;
 
-kmem_cache_t *ocfs2_lock_cache = NULL;
-
 /* OCFS2 needs to schedule several differnt types of work which
  * require cluster locking, disk I/O, recovery waits, etc. Since these
  * types of work tend to be heavy we avoid using the kernel events
@@ -946,14 +944,6 @@ static int ocfs2_initialize_mem_caches(void)
 	if (!ocfs2_inode_cachep)
 		return -ENOMEM;
 
-	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
-					     sizeof(struct ocfs2_journal_lock),
-					     0,
-					     SLAB_HWCACHE_ALIGN,
-					     NULL, NULL);
-	if (!ocfs2_lock_cache)
-		return -ENOMEM;
-
 	return 0;
 }
 
@@ -961,11 +951,8 @@ static void ocfs2_free_mem_caches(void)
 {
 	if (ocfs2_inode_cachep)
 		kmem_cache_destroy(ocfs2_inode_cachep);
-	if (ocfs2_lock_cache)
-		kmem_cache_destroy(ocfs2_lock_cache);
 
 	ocfs2_inode_cachep = NULL;
-	ocfs2_lock_cache = NULL;
 }
 
 static int ocfs2_get_sector(struct super_block *sb,
-- 
cgit v1.2.3


From a301a27d715276a71827004549bcbb2b64776c11 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 6 Oct 2006 19:07:43 -0700
Subject: ocfs2: make ocfs2_alloc_handle() static

This is no longer used outside of journal.c

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.c | 2 +-
 fs/ocfs2/journal.h | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3ef1678c893..f070487a055 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -110,7 +110,7 @@ finally:
 	return status;
 }
 
-struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
+static struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 {
 	struct ocfs2_journal_handle *retval = NULL;
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7e9c1303d63..3e64e6b646a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -198,11 +198,6 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *  Transaction Handling:
  *  Manage the lifetime of a transaction handle.
  *
- *  ocfs2_alloc_handle     - Only allocate a handle so we can start putting
- *                          cluster locks on it. To actually change blocks,
- *                          call ocfs2_start_trans with the handle returned
- *                          from this function. You may call ocfs2_commit_trans
- *                           at any time in the lifetime of a handle.
  *  ocfs2_start_trans      - Begin a transaction. Give it an upper estimate of
  *                          the number of blocks that will be changed during
  *                          this handle.
@@ -221,7 +216,6 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 /* You must always start_trans with a number of buffs > 0, but it's
  * perfectly legal to go through an entire transaction without having
  * dirtied any buffers. */
-struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb);
 struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 					       struct ocfs2_journal_handle *handle,
 					       int max_buffs);
-- 
cgit v1.2.3


From 4bcec1847ac4f75c2ee6d091b495f34d8d822e6a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 9 Oct 2006 16:02:40 -0700
Subject: ocfs2: remove unused handle argument from ocfs2_meta_lock_full()

Now that this is unused and all callers pass NULL, we can safely remove it.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c      |  7 ++-----
 fs/ocfs2/aops.c       |  8 ++++----
 fs/ocfs2/dir.c        |  2 +-
 fs/ocfs2/dlmglue.c    |  7 ++-----
 fs/ocfs2/dlmglue.h    |  4 +---
 fs/ocfs2/export.c     |  2 +-
 fs/ocfs2/file.c       |  6 +++---
 fs/ocfs2/inode.c      | 10 +++++-----
 fs/ocfs2/ioctl.c      |  4 ++--
 fs/ocfs2/journal.c    | 10 ++++------
 fs/ocfs2/localalloc.c |  4 ++--
 fs/ocfs2/namei.c      | 24 ++++++++++++------------
 fs/ocfs2/suballoc.c   |  2 +-
 fs/ocfs2/super.c      |  2 +-
 14 files changed, 41 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c8a4727bae8..22dd60833c9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1152,7 +1152,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 
 	mutex_lock(&data_alloc_inode->i_mutex);
 
-	status = ocfs2_meta_lock(data_alloc_inode, NULL, &data_alloc_bh, 1);
+	status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
@@ -2003,10 +2003,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 		mutex_lock(&ext_alloc_inode->i_mutex);
 		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
 
-		status = ocfs2_meta_lock(ext_alloc_inode,
-					 NULL,
-					 &ext_alloc_bh,
-					 1);
+		status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3d7c082a8f5..196a71cb976 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -200,7 +200,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 
 	mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, NULL, 0, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
 	if (ret != 0) {
 		if (ret == AOP_TRUNCATED_PAGE)
 			unlock = 0;
@@ -305,7 +305,7 @@ static int ocfs2_prepare_write(struct file *file, struct page *page,
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, NULL, 0, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -412,7 +412,7 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 	 *    stale inode allocation image (i_size, i_clusters, etc).
 	 */
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
+	ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -490,7 +490,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 	 * accessed concurrently from multiple nodes.
 	 */
 	if (!INODE_JOURNAL(inode)) {
-		err = ocfs2_meta_lock(inode, NULL, NULL, 0);
+		err = ocfs2_meta_lock(inode, NULL, 0);
 		if (err) {
 			if (err != -ENOENT)
 				mlog_errno(err);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d10ccea82fa..db46ee50578 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -89,7 +89,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	stored = 0;
 	bh = NULL;
 
-	error = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	error = ocfs2_meta_lock(inode, NULL, 0);
 	if (error < 0) {
 		if (error != -ENOENT)
 			mlog_errno(error);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c29b96b99e..7a22118ef04 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1579,7 +1579,6 @@ static int ocfs2_assign_bh(struct inode *inode,
  * the result of the lock will be communicated via the callback.
  */
 int ocfs2_meta_lock_full(struct inode *inode,
-			 struct ocfs2_journal_handle *handle,
 			 struct buffer_head **ret_bh,
 			 int ex,
 			 int arg_flags)
@@ -1707,18 +1706,16 @@ bail:
  * the lock inversion simply.
  */
 int ocfs2_meta_lock_with_page(struct inode *inode,
-			      struct ocfs2_journal_handle *handle,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page)
 {
 	int ret;
 
-	ret = ocfs2_meta_lock_full(inode, handle, ret_bh, ex,
-				   OCFS2_LOCK_NONBLOCK);
+	ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
 	if (ret == -EAGAIN) {
 		unlock_page(page);
-		if (ocfs2_meta_lock(inode, handle, ret_bh, ex) == 0)
+		if (ocfs2_meta_lock(inode, ret_bh, ex) == 0)
 			ocfs2_meta_unlock(inode, ex);
 		ret = AOP_TRUNCATED_PAGE;
 	}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 75c49ce2be3..a1165edef43 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -81,18 +81,16 @@ void ocfs2_data_unlock(struct inode *inode,
 int ocfs2_rw_lock(struct inode *inode, int write);
 void ocfs2_rw_unlock(struct inode *inode, int write);
 int ocfs2_meta_lock_full(struct inode *inode,
-			 struct ocfs2_journal_handle *handle,
 			 struct buffer_head **ret_bh,
 			 int ex,
 			 int arg_flags);
 int ocfs2_meta_lock_with_page(struct inode *inode,
-			      struct ocfs2_journal_handle *handle,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page);
 /* 99% of the time we don't want to supply any additional flags --
  * those are for very specific cases only. */
-#define ocfs2_meta_lock(i, h, b, e) ocfs2_meta_lock_full(i, h, b, e, 0)
+#define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0)
 void ocfs2_meta_unlock(struct inode *inode,
 		       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index fb91089a60a..06be6e774cf 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -100,7 +100,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	mlog(0, "find parent of directory %llu\n",
 	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
+	status = ocfs2_meta_lock(dir, NULL, 0);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bd944e893bb..bcae2693e95 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -812,7 +812,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 
-	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	status = ocfs2_meta_lock(inode, &bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1006,7 +1006,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	 */
 	meta_level = (filp->f_flags & O_APPEND) ? 1 : 0;
 	for(;;) {
-		ret = ocfs2_meta_lock(inode, NULL, NULL, meta_level);
+		ret = ocfs2_meta_lock(inode, NULL, meta_level);
 		if (ret < 0) {
 			meta_level = -1;
 			mlog_errno(ret);
@@ -1174,7 +1174,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	 * like i_size. This allows the checks down below
 	 * generic_file_aio_read() a chance of actually working. 
 	 */
-	ret = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	ret = ocfs2_meta_lock(inode, NULL, 0);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 20e60e4b1f1..dec0ece6267 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -441,7 +441,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 				  generation, inode);
 
 	if (can_lock) {
-		status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+		status = ocfs2_meta_lock(inode, NULL, 0);
 		if (status) {
 			make_bad_inode(inode);
 			mlog_errno(status);
@@ -582,7 +582,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	mutex_lock(&inode_alloc_inode->i_mutex);
-	status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
+	status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1);
 	if (status < 0) {
 		mutex_unlock(&inode_alloc_inode->i_mutex);
 
@@ -705,7 +705,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
 	 * delete_inode operation. We do this now to avoid races with
 	 * recovery completion on other nodes. */
 	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
+	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
 	if (status < 0) {
 		mutex_unlock(&orphan_dir_inode->i_mutex);
 
@@ -933,7 +933,7 @@ void ocfs2_delete_inode(struct inode *inode)
 	 * allocation lock here as it won't be needed - nobody will
 	 * have the file open.
 	 */
-	status = ocfs2_meta_lock(inode, NULL, &di_bh, 1);
+	status = ocfs2_meta_lock(inode, &di_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1180,7 +1180,7 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
 
 	/* Let ocfs2_meta_lock do the work of updating our struct
 	 * inode for us. */
-	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	status = ocfs2_meta_lock(inode, NULL, 0);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 3663cef8068..16d8861b281 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -26,7 +26,7 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
 {
 	int status;
 
-	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	status = ocfs2_meta_lock(inode, NULL, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		return status;
@@ -50,7 +50,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 
 	mutex_lock(&inode->i_mutex);
 
-	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	status = ocfs2_meta_lock(inode, &bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f070487a055..61db8e7fffa 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -424,8 +424,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	/* Skip recovery waits here - journal inode metadata never
 	 * changes in a live cluster so it can be considered an
 	 * exception to the rule. */
-	status = ocfs2_meta_lock_full(inode, NULL, &bh, 1,
-				      OCFS2_META_LOCK_RECOVERY);
+	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
 	if (status < 0) {
 		if (status != -ERESTARTSYS)
 			mlog(ML_ERROR, "Could not get lock on journal!\n");
@@ -1022,8 +1021,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	}
 	SET_INODE_JOURNAL(inode);
 
-	status = ocfs2_meta_lock_full(inode, NULL, &bh, 1,
-				      OCFS2_META_LOCK_RECOVERY);
+	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
 	if (status < 0) {
 		mlog(0, "status returned from ocfs2_meta_lock=%d\n", status);
 		if (status != -ERESTARTSYS)
@@ -1212,7 +1210,7 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
 	SET_INODE_JOURNAL(inode);
 
 	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
-	status = ocfs2_meta_lock_full(inode, NULL, NULL, 1, flags);
+	status = ocfs2_meta_lock_full(inode, NULL, 1, flags);
 	if (status < 0) {
 		if (status != -EAGAIN)
 			mlog_errno(status);
@@ -1295,7 +1293,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 	}	
 
 	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);
+	status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 869383e7c56..4c46c0a81e0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -231,7 +231,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
 	mutex_lock(&main_bm_inode->i_mutex);
 
-	status = ocfs2_meta_lock(main_bm_inode, NULL, &main_bm_bh, 1);
+	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
@@ -399,7 +399,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 
 	mutex_lock(&main_bm_inode->i_mutex);
 
-	status = ocfs2_meta_lock(main_bm_inode, NULL, &main_bm_bh, 1);
+	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8d03ef44686..daf70605afa 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -159,7 +159,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
+	status = ocfs2_meta_lock(dir, NULL, 0);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -327,7 +327,7 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
-	status = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
+	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -658,7 +658,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	err = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
+	err = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -683,7 +683,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out;
 	}
 
-	err = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
+	err = ocfs2_meta_lock(inode, &fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -825,7 +825,7 @@ static int ocfs2_unlink(struct inode *dir,
 		return -EPERM;
 	}
 
-	status = ocfs2_meta_lock(dir, NULL, &parent_node_bh, 1);
+	status = ocfs2_meta_lock(dir, &parent_node_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -850,7 +850,7 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
+	status = ocfs2_meta_lock(inode, &fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1012,7 +1012,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			inode1 = tmpinode;
 		}
 		/* lock id2 */
-		status = ocfs2_meta_lock(inode2, NULL, bh2, 1);
+		status = ocfs2_meta_lock(inode2, bh2, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -1021,7 +1021,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	}
 
 	/* lock id1 */
-	status = ocfs2_meta_lock(inode1, NULL, bh1, 1);
+	status = ocfs2_meta_lock(inode1, bh1, 1);
 	if (status < 0) {
 		/*
 		 * An error return must mean that no cluster locks
@@ -1142,7 +1142,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	 * the vote thread on other nodes won't have to concurrently
 	 * downconvert the inode and the dentry locks.
 	 */
-	status = ocfs2_meta_lock(old_inode, NULL, NULL, 1);
+	status = ocfs2_meta_lock(old_inode, NULL, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1234,7 +1234,7 @@ static int ocfs2_rename(struct inode *old_dir,
 			goto bail;
 		}
 
-		status = ocfs2_meta_lock(new_inode, NULL, &newfe_bh, 1);
+		status = ocfs2_meta_lock(new_inode, &newfe_bh, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -1608,7 +1608,7 @@ static int ocfs2_symlink(struct inode *dir,
 	credits = ocfs2_calc_symlink_credits(sb);
 
 	/* lock the parent directory */
-	status = ocfs2_meta_lock(dir, NULL, &parent_fe_bh, 1);
+	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -2127,7 +2127,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
 	mutex_lock(&orphan_dir_inode->i_mutex);
 
-	status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
+	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 31bda54fefe..27f8dc4c819 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -421,7 +421,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
 	mutex_lock(&alloc_inode->i_mutex);
 
-	status = ocfs2_meta_lock(alloc_inode, NULL, &bh, 1);
+	status = ocfs2_meta_lock(alloc_inode, &bh, 1);
 	if (status < 0) {
 		mutex_unlock(&alloc_inode->i_mutex);
 		iput(alloc_inode);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 290d531bed6..d9b1eb6a3c6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -865,7 +865,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 		goto bail;
 	}
 
-	status = ocfs2_meta_lock(inode, NULL, &bh, 0);
+	status = ocfs2_meta_lock(inode, &bh, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
-- 
cgit v1.2.3


From 02dc1af44e9fa4b8801169891b3a1ba4047537ad Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 9 Oct 2006 16:48:10 -0700
Subject: ocfs2: pass ocfs2_super * into ocfs2_commit_trans()

This sets us up to remove handle->journal.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c      |  8 ++++----
 fs/ocfs2/aops.c       |  4 ++--
 fs/ocfs2/dir.c        |  2 +-
 fs/ocfs2/file.c       | 12 ++++++------
 fs/ocfs2/inode.c      |  6 +++---
 fs/ocfs2/ioctl.c      |  2 +-
 fs/ocfs2/journal.c    |  5 +++--
 fs/ocfs2/journal.h    |  3 ++-
 fs/ocfs2/localalloc.c |  6 +++---
 fs/ocfs2/namei.c      | 10 +++++-----
 fs/ocfs2/suballoc.c   |  2 +-
 11 files changed, 31 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 22dd60833c9..f3dbd31a85a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1170,7 +1170,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	if (status < 0)
 		mlog_errno(status);
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
 	brelse(data_alloc_bh);
@@ -1379,7 +1379,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 
 		status = ocfs2_truncate_log_append(osb, handle,
 						   start_blk, clusters);
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail_up;
@@ -1883,7 +1883,7 @@ start:
 	mutex_unlock(&tl_inode->i_mutex);
 	tl_sem = 0;
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 	handle = NULL;
 
 	BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
@@ -1898,7 +1898,7 @@ bail:
 		mutex_unlock(&tl_inode->i_mutex);
 
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (last_eb_bh)
 		brelse(last_eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 196a71cb976..c3693999694 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -382,7 +382,7 @@ struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
 out:
 	if (ret) {
 		if (handle)
-			ocfs2_commit_trans(handle);
+			ocfs2_commit_trans(osb, handle);
 		handle = ERR_PTR(ret);
 	}
 	return handle;
@@ -464,7 +464,7 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 	}
 
 out_commit:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out_unlock_data:
 	ocfs2_data_unlock(inode, 1);
 out_unlock_meta:
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index db46ee50578..2bda5f34521 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -488,7 +488,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	get_bh(*new_de_bh);
 bail:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (data_ac)
 		ocfs2_free_alloc_context(data_ac);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bcae2693e95..9eb60f21968 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -178,7 +178,7 @@ static int ocfs2_simple_size_update(struct inode *inode,
 	if (ret < 0)
 		mlog_errno(ret);
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 out:
 	return ret;
 }
@@ -207,7 +207,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 	if (status < 0)
 		mlog_errno(status);
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 out:
 	mlog_exit(status);
 	return status;
@@ -576,7 +576,7 @@ leave:
 		drop_alloc_sem = 0;
 	}
 	if (handle) {
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 		handle = NULL;
 	}
 	if (data_ac) {
@@ -655,7 +655,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
 		ret = 0;
 
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out_unlock:
 	unlock_page(page);
 	page_cache_release(page);
@@ -850,7 +850,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		mlog_errno(status);
 
 bail_commit:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 bail_unlock:
 	ocfs2_meta_unlock(inode, 1);
 bail_unlock_rw:
@@ -938,7 +938,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 out_bh:
 	brelse(bh);
 out_trans:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 out:
 	mlog_exit(ret);
 	return ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index dec0ece6267..7708c63a355 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -538,7 +538,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 	handle = NULL;
 
 	status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
@@ -554,7 +554,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 	}
 bail:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	mlog_exit(status);
 	return status;
@@ -629,7 +629,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 		mlog_errno(status);
 
 bail_commit:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 bail_unlock:
 	ocfs2_meta_unlock(inode_alloc_inode, 1);
 	mutex_unlock(&inode_alloc_inode->i_mutex);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 16d8861b281..e927758e8cd 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -96,7 +96,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 	if (status < 0)
 		mlog_errno(status);
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 bail_unlock:
 	ocfs2_meta_unlock(inode, 1);
 bail:
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 61db8e7fffa..00402e749fc 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -195,11 +195,12 @@ done_free:
 	return ERR_PTR(ret);
 }
 
-void ocfs2_commit_trans(struct ocfs2_journal_handle *handle)
+void ocfs2_commit_trans(struct ocfs2_super *osb,
+			struct ocfs2_journal_handle *handle)
 {
 	handle_t *jbd_handle;
 	int retval;
-	struct ocfs2_journal *journal = handle->journal;
+	struct ocfs2_journal *journal = osb->journal;
 
 	mlog_entry_void();
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3e64e6b646a..d315179d0c0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -219,7 +219,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 					       struct ocfs2_journal_handle *handle,
 					       int max_buffs);
-void			     ocfs2_commit_trans(struct ocfs2_journal_handle *handle);
+void			     ocfs2_commit_trans(struct ocfs2_super *osb,
+						struct ocfs2_journal_handle *handle);
 int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 
 /*
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 4c46c0a81e0..9dd208dc5d7 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -280,7 +280,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 		mlog_errno(status);
 
 out_commit:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
 	if (main_bm_bh)
@@ -421,7 +421,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	if (status < 0)
 		mlog_errno(status);
 
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
 	ocfs2_meta_unlock(main_bm_inode, 1);
@@ -954,7 +954,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 	status = 0;
 bail:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (main_bm_bh)
 		brelse(main_bm_bh);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index daf70605afa..9a56bfdf45c 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -440,7 +440,7 @@ static int ocfs2_mknod(struct inode *dir,
 	status = 0;
 leave:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	ocfs2_meta_unlock(dir, 1);
 
@@ -746,7 +746,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	d_instantiate(dentry, inode);
 
 out_commit:
-	ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(osb, handle);
 out_unlock_inode:
 	ocfs2_meta_unlock(inode, 1);
 
@@ -942,7 +942,7 @@ static int ocfs2_unlink(struct inode *dir,
 
 leave:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (child_locked)
 		ocfs2_meta_unlock(inode, 1);
@@ -1428,7 +1428,7 @@ bail:
 		ocfs2_rename_unlock(osb);
 
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (parents_locked)
 		ocfs2_double_unlock(old_dir, new_dir);
@@ -1730,7 +1730,7 @@ static int ocfs2_symlink(struct inode *dir,
 	d_instantiate(dentry, inode);
 bail:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	ocfs2_meta_unlock(dir, 1);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 27f8dc4c819..ea63a519167 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -387,7 +387,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 	status = 0;
 bail:
 	if (handle)
-		ocfs2_commit_trans(handle);
+		ocfs2_commit_trans(osb, handle);
 
 	if (ac)
 		ocfs2_free_alloc_context(ac);
-- 
cgit v1.2.3


From dae85832ffe2879b57b23aea319a0ec17667898d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 9 Oct 2006 16:50:32 -0700
Subject: ocfs2: remove ocfs2_journal_handle journal field

It is no longer used.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.c | 2 --
 fs/ocfs2/journal.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 00402e749fc..81fb917475f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -122,8 +122,6 @@ static struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
 	}
 	retval->k_handle = NULL;
 
-	retval->journal = osb->journal;
-
 	return retval;
 }
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d315179d0c0..d96f61abd66 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -135,7 +135,6 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 
 struct ocfs2_journal_handle {
 	handle_t            *k_handle; /* kernel handle.                */
-	struct ocfs2_journal        *journal;
 };
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-- 
cgit v1.2.3


From 65eff9ccf86d63eb5c3e9071450a36e4e4fa9564 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 9 Oct 2006 17:26:22 -0700
Subject: ocfs2: remove handle argument to ocfs2_start_trans()

All callers either pass in NULL directly, or a local variable that is
already set to NULL.

The internals of ocfs2_start_trans() get a nice cleanup as a result.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c      |  7 +++----
 fs/ocfs2/aops.c       |  2 +-
 fs/ocfs2/dir.c        |  2 +-
 fs/ocfs2/file.c       | 11 +++++------
 fs/ocfs2/inode.c      |  4 ++--
 fs/ocfs2/ioctl.c      |  2 +-
 fs/ocfs2/journal.c    | 29 ++++++++---------------------
 fs/ocfs2/journal.h    |  1 -
 fs/ocfs2/localalloc.c |  6 +++---
 fs/ocfs2/namei.c      | 10 +++++-----
 fs/ocfs2/suballoc.c   |  2 +-
 11 files changed, 30 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f3dbd31a85a..354817acf8b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1158,7 +1158,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_TRUNCATE_LOG_UPDATE);
+	handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -1365,8 +1365,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 			}
 		}
 
-		handle = ocfs2_start_trans(osb, NULL,
-					   OCFS2_TRUNCATE_LOG_UPDATE);
+		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
 		if (IS_ERR(handle)) {
 			status = PTR_ERR(handle);
 			mlog_errno(status);
@@ -1860,7 +1859,7 @@ start:
 
 	credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
 						fe, el);
-	handle = ocfs2_start_trans(osb, NULL, credits);
+	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c3693999694..fcdcdf6aa35 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -364,7 +364,7 @@ struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
 	struct ocfs2_journal_handle *handle = NULL;
 	int ret = 0;
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (!handle) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 2bda5f34521..036c891c1e1 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -442,7 +442,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, credits);
+	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9eb60f21968..100754de16b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -165,8 +165,7 @@ static int ocfs2_simple_size_update(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_journal_handle *handle = NULL;
 
-	handle = ocfs2_start_trans(osb, NULL,
-				   OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
@@ -196,7 +195,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 	/* TODO: This needs to actually orphan the inode in this
 	 * transaction. */
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -496,7 +495,7 @@ restart_all:
 	drop_alloc_sem = 1;
 
 	credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
-	handle = ocfs2_start_trans(osb, NULL, credits);
+	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -832,7 +831,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -905,7 +904,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 	mlog_entry("(Inode %llu, mode 0%o)\n",
 		   (unsigned long long)oi->ip_blkno, inode->i_mode);
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7708c63a355..ac78877ba14 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -524,7 +524,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 	if (!fe->i_clusters)
 		goto bail;
 
-	handle = ocfs2_start_trans(osb, handle, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -590,7 +590,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_DELETE_INODE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index e927758e8cd..752735842b1 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -67,7 +67,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 	if (!S_ISDIR(inode->i_mode))
 		flags &= ~OCFS2_DIRSYNC_FL;
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 81fb917475f..6d9658b0c5d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -129,20 +129,16 @@ static struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
  * you pass it a handle however, it may still return error, in which
  * case it has free'd the passed handle for you. */
 struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
-					       struct ocfs2_journal_handle *handle,
 					       int max_buffs)
 {
 	int ret;
 	journal_t *journal = osb->journal->j_journal;
-
-	mlog_entry("(max_buffs = %d)\n", max_buffs);
+	struct ocfs2_journal_handle *handle;
 
 	BUG_ON(!osb || !osb->journal->j_journal);
 
-	if (ocfs2_is_hard_readonly(osb)) {
-		ret = -EROFS;
-		goto done_free;
-	}
+	if (ocfs2_is_hard_readonly(osb))
+		return ERR_PTR(-EROFS);
 
 	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
 	BUG_ON(max_buffs <= 0);
@@ -153,13 +149,11 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 		BUG();
 	}
 
-	if (!handle)
-		handle = ocfs2_alloc_handle(osb);
+	handle = ocfs2_alloc_handle(osb);
 	if (!handle) {
 		ret = -ENOMEM;
-		mlog(ML_ERROR, "Failed to allocate memory for journal "
-		     "handle!\n");
-		goto done_free;
+		mlog_errno(ret);
+		return ERR_PTR(ret);
 	}
 
 	down_read(&osb->journal->j_trans_barrier);
@@ -168,6 +162,7 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 	handle->k_handle = journal_start(journal, max_buffs);
 	if (IS_ERR(handle->k_handle)) {
 		up_read(&osb->journal->j_trans_barrier);
+		kfree(handle);
 
 		ret = PTR_ERR(handle->k_handle);
 		handle->k_handle = NULL;
@@ -177,20 +172,12 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 			ocfs2_abort(osb->sb, "Detected aborted journal");
 			ret = -EROFS;
 		}
-		goto done_free;
+		return ERR_PTR(ret);
 	}
 
 	atomic_inc(&(osb->journal->j_num_trans));
 
-	mlog_exit_ptr(handle);
 	return handle;
-
-done_free:
-	if (handle)
-		kfree(handle);
-
-	mlog_exit(ret);
-	return ERR_PTR(ret);
 }
 
 void ocfs2_commit_trans(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d96f61abd66..3c0d15c5e49 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -216,7 +216,6 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  * perfectly legal to go through an entire transaction without having
  * dirtied any buffers. */
 struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
-					       struct ocfs2_journal_handle *handle,
 					       int max_buffs);
 void			     ocfs2_commit_trans(struct ocfs2_super *osb,
 						struct ocfs2_journal_handle *handle);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 9dd208dc5d7..2ae567a7042 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -238,7 +238,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	}
 
 	/* WINDOW_MOVE_CREDITS is a bit heavy... */
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		mlog_errno(PTR_ERR(handle));
 		handle = NULL;
@@ -405,7 +405,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -896,7 +896,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_WINDOW_MOVE_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9a56bfdf45c..d63ab7be89a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -379,7 +379,7 @@ static int ocfs2_mknod(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_MKNOD_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -696,7 +696,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_unlock_inode;
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_LINK_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
 		handle = NULL;
@@ -886,7 +886,7 @@ static int ocfs2_unlink(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_UNLINK_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1284,7 +1284,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, OCFS2_RENAME_CREDITS);
+	handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
@@ -1652,7 +1652,7 @@ static int ocfs2_symlink(struct inode *dir,
 		}
 	}
 
-	handle = ocfs2_start_trans(osb, NULL, credits);
+	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea63a519167..186d2c3d524 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -302,7 +302,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	credits = ocfs2_calc_group_alloc_credits(osb->sb,
 						 le16_to_cpu(cl->cl_cpg));
-	handle = ocfs2_start_trans(osb, handle, credits);
+	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		handle = NULL;
-- 
cgit v1.2.3


From 1fabe1481fac9e01bf8bffa60a2307ef379aa5de Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 9 Oct 2006 18:11:45 -0700
Subject: ocfs2: Remove struct ocfs2_journal_handle in favor of handle_t

This is mostly a search and replace as ocfs2_journal_handle is now no more
than a container for a handle_t pointer.

ocfs2_commit_trans() becomes very straight forward, and we remove some out
of date comments / code.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c      | 32 ++++++++---------
 fs/ocfs2/alloc.h      |  2 +-
 fs/ocfs2/aops.c       |  8 ++---
 fs/ocfs2/aops.h       |  2 +-
 fs/ocfs2/dir.c        |  4 +--
 fs/ocfs2/dir.h        |  2 +-
 fs/ocfs2/file.c       | 18 +++++-----
 fs/ocfs2/file.h       |  4 +--
 fs/ocfs2/inode.c      |  6 ++--
 fs/ocfs2/inode.h      |  2 +-
 fs/ocfs2/ioctl.c      |  2 +-
 fs/ocfs2/journal.c    | 95 +++++++++++----------------------------------------
 fs/ocfs2/journal.h    | 21 ++++++------
 fs/ocfs2/localalloc.c | 18 +++++-----
 fs/ocfs2/localalloc.h |  2 +-
 fs/ocfs2/namei.c      | 38 ++++++++++-----------
 fs/ocfs2/namei.h      |  2 +-
 fs/ocfs2/ocfs2.h      |  2 +-
 fs/ocfs2/suballoc.c   | 44 ++++++++++++------------
 fs/ocfs2/suballoc.h   | 12 +++----
 20 files changed, 130 insertions(+), 186 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 354817acf8b..85a048b7a67 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -52,14 +52,14 @@ static int ocfs2_extent_contig(struct inode *inode,
 			       u64 blkno);
 
 static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     struct inode *inode,
 				     int wanted,
 				     struct ocfs2_alloc_context *meta_ac,
 				     struct buffer_head *bhs[]);
 
 static int ocfs2_add_branch(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
+			    handle_t *handle,
 			    struct inode *inode,
 			    struct buffer_head *fe_bh,
 			    struct buffer_head *eb_bh,
@@ -67,14 +67,14 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 			    struct ocfs2_alloc_context *meta_ac);
 
 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
-				  struct ocfs2_journal_handle *handle,
+				  handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *fe_bh,
 				  struct ocfs2_alloc_context *meta_ac,
 				  struct buffer_head **ret_new_eb_bh);
 
 static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
-				  struct ocfs2_journal_handle *handle,
+				  handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *fe_bh,
 				  u64 blkno,
@@ -152,7 +152,7 @@ bail:
  * l_count for you
  */
 static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     struct inode *inode,
 				     int wanted,
 				     struct ocfs2_alloc_context *meta_ac,
@@ -253,7 +253,7 @@ bail:
  * contain a single record with e_clusters == 0.
  */
 static int ocfs2_add_branch(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
+			    handle_t *handle,
 			    struct inode *inode,
 			    struct buffer_head *fe_bh,
 			    struct buffer_head *eb_bh,
@@ -418,7 +418,7 @@ bail:
  * after this call.
  */
 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
-				  struct ocfs2_journal_handle *handle,
+				  handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *fe_bh,
 				  struct ocfs2_alloc_context *meta_ac,
@@ -520,7 +520,7 @@ bail:
  * down.
  */
 static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
-				  struct ocfs2_journal_handle *handle,
+				  handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *fe_bh,
 				  u64 start_blk,
@@ -809,7 +809,7 @@ bail:
 
 /* the caller needs to update fe->i_clusters */
 int ocfs2_insert_extent(struct ocfs2_super *osb,
-			struct ocfs2_journal_handle *handle,
+			handle_t *handle,
 			struct inode *inode,
 			struct buffer_head *fe_bh,
 			u64 start_blk,
@@ -951,7 +951,7 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
 }
 
 static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     u64 start_blk,
 				     unsigned int num_clusters)
 {
@@ -1034,7 +1034,7 @@ bail:
 }
 
 static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
-					 struct ocfs2_journal_handle *handle,
+					 handle_t *handle,
 					 struct inode *data_alloc_inode,
 					 struct buffer_head *data_alloc_bh)
 {
@@ -1074,7 +1074,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 		/* TODO: Perhaps we can calculate the bulk of the
 		 * credits up front rather than extending like
 		 * this. */
-		status = ocfs2_extend_trans(handle->k_handle,
+		status = ocfs2_extend_trans(handle,
 					    OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1113,7 +1113,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 {
 	int status;
 	unsigned int num_to_flush;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct inode *data_alloc_inode = NULL;
 	struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -1339,7 +1339,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
 	int i;
 	unsigned int clusters, num_recs, start_cluster;
 	u64 start_blk;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_truncate_log *tl;
 
@@ -1534,7 +1534,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			     struct inode *inode,
 			     struct buffer_head *fe_bh,
 			     struct buffer_head *old_last_eb_bh,
-			     struct ocfs2_journal_handle *handle,
+			     handle_t *handle,
 			     struct ocfs2_truncate_context *tc)
 {
 	int status, i, depth;
@@ -1773,7 +1773,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 	struct buffer_head *last_eb_bh;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct inode *tl_inode = osb->osb_tl_inode;
 
 	mlog_entry_void();
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 12ba897743f..0b82e804432 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -28,7 +28,7 @@
 
 struct ocfs2_alloc_context;
 int ocfs2_insert_extent(struct ocfs2_super *osb,
-			struct ocfs2_journal_handle *handle,
+			handle_t *handle,
 			struct inode *inode,
 			struct buffer_head *fe_bh,
 			u64 blkno,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index fcdcdf6aa35..2f7268e8152 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -355,13 +355,13 @@ static int walk_page_buffers(	handle_t *handle,
 	return ret;
 }
 
-struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
+handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
 							 unsigned from,
 							 unsigned to)
 {
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	int ret = 0;
 
 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
@@ -372,7 +372,7 @@ struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
 	}
 
 	if (ocfs2_should_order_data(inode)) {
-		ret = walk_page_buffers(handle->k_handle,
+		ret = walk_page_buffers(handle,
 					page_buffers(page),
 					from, to, NULL,
 					ocfs2_journal_dirty_data);
@@ -394,7 +394,7 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 	int ret;
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = page->mapping->host;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_dinode *di;
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index e88c3f0b8fa..f446a15eab8 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -25,7 +25,7 @@
 int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
 			       unsigned from, unsigned to);
 
-struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
+handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
 							 unsigned from,
 							 unsigned to);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 036c891c1e1..5efea44d780 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -340,7 +340,7 @@ int ocfs2_empty_dir(struct inode *inode)
 
 /* returns a bh of the 1st new block in the allocation. */
 int ocfs2_do_extend_dir(struct super_block *sb,
-			struct ocfs2_journal_handle *handle,
+			handle_t *handle,
 			struct inode *dir,
 			struct buffer_head *parent_fe_bh,
 			struct ocfs2_alloc_context *data_ac,
@@ -398,7 +398,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
 	struct ocfs2_alloc_context *data_ac = NULL;
 	struct ocfs2_alloc_context *meta_ac = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct buffer_head *new_bh = NULL;
 	struct ocfs2_dir_entry * de;
 	struct super_block *sb = osb->sb;
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index 5f614ec9649..3f67e146864 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -45,7 +45,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
 				 struct buffer_head **ret_de_bh);
 struct ocfs2_alloc_context;
 int ocfs2_do_extend_dir(struct super_block *sb,
-			struct ocfs2_journal_handle *handle,
+			handle_t *handle,
 			struct inode *dir,
 			struct buffer_head *parent_fe_bh,
 			struct ocfs2_alloc_context *data_ac,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 100754de16b..fe6b795b1a4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -134,7 +134,7 @@ bail:
 	return (err < 0) ? -EIO : 0;
 }
 
-int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle,
+int ocfs2_set_inode_size(handle_t *handle,
 			 struct inode *inode,
 			 struct buffer_head *fe_bh,
 			 u64 new_i_size)
@@ -163,7 +163,7 @@ static int ocfs2_simple_size_update(struct inode *inode,
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 
 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (handle == NULL) {
@@ -188,7 +188,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 				     u64 new_i_size)
 {
 	int status;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 
 	mlog_entry_void();
 
@@ -327,7 +327,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 			       struct inode *inode,
 			       u32 clusters_to_add,
 			       struct buffer_head *fe_bh,
-			       struct ocfs2_journal_handle *handle,
+			       handle_t *handle,
 			       struct ocfs2_alloc_context *data_ac,
 			       struct ocfs2_alloc_context *meta_ac,
 			       enum ocfs2_alloc_restarted *reason_ret)
@@ -432,7 +432,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
 	u32 prev_clusters;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_dinode *fe = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_alloc_context *data_ac = NULL;
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	enum ocfs2_alloc_restarted why;
@@ -552,7 +552,7 @@ restarted_transaction:
 			credits = ocfs2_calc_extend_credits(osb->sb,
 							    fe,
 							    clusters_to_add);
-			status = ocfs2_extend_trans(handle->k_handle, credits);
+			status = ocfs2_extend_trans(handle, credits);
 			if (status < 0) {
 				/* handle still has to be committed at
 				 * this point. */
@@ -610,7 +610,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
 	struct page *page;
 	unsigned long index;
 	unsigned int offset;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	int ret;
 
 	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
@@ -775,7 +775,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct super_block *sb = inode->i_sb;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	struct buffer_head *bh = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 
 	mlog_entry("(0x%p, '%.*s')\n", dentry,
 	           dentry->d_name.len, dentry->d_name.name);
@@ -897,7 +897,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
 	int ret;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di;
 
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 740c9e7ca59..84f8b974dca 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -41,7 +41,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 			       struct inode *inode,
 			       u32 clusters_to_add,
 			       struct buffer_head *fe_bh,
-			       struct ocfs2_journal_handle *handle,
+			       handle_t *handle,
 			       struct ocfs2_alloc_context *data_ac,
 			       struct ocfs2_alloc_context *meta_ac,
 			       enum ocfs2_alloc_restarted *reason);
@@ -49,7 +49,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
 
-int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle,
+int ocfs2_set_inode_size(handle_t *handle,
 			 struct inode *inode,
 			 struct buffer_head *fe_bh,
 			 u64 new_i_size);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ac78877ba14..ad4d4a1df77 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -512,7 +512,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 				     struct buffer_head *fe_bh)
 {
 	int status = 0;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_truncate_context *tc = NULL;
 	struct ocfs2_dinode *fe;
 
@@ -568,7 +568,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	int status;
 	struct inode *inode_alloc_inode = NULL;
 	struct buffer_head *inode_alloc_bh = NULL;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 
@@ -1198,7 +1198,7 @@ bail:
  * struct inode.
  * Only takes ip_lock.
  */
-int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
+int ocfs2_mark_inode_dirty(handle_t *handle,
 			   struct inode *inode,
 			   struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9b7354545cb..46a378fbc40 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -136,7 +136,7 @@ ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
 void ocfs2_sync_blockdev(struct super_block *sb);
 void ocfs2_refresh_inode(struct inode *inode,
 			 struct ocfs2_dinode *fe);
-int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
+int ocfs2_mark_inode_dirty(handle_t *handle,
 			   struct inode *inode,
 			   struct buffer_head *bh);
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 752735842b1..4768be5f308 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -43,7 +43,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 {
 	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct buffer_head *bh = NULL;
 	unsigned oldflags;
 	int status;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 6d9658b0c5d..c0ad7cb5952 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -110,30 +110,13 @@ finally:
 	return status;
 }
 
-static struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
-{
-	struct ocfs2_journal_handle *retval = NULL;
-
-	retval = kcalloc(1, sizeof(*retval), GFP_NOFS);
-	if (!retval) {
-		mlog(ML_ERROR, "Failed to allocate memory for journal "
-		     "handle!\n");
-		return NULL;
-	}
-	retval->k_handle = NULL;
-
-	return retval;
-}
-
 /* pass it NULL and it will allocate a new handle object for you.  If
  * you pass it a handle however, it may still return error, in which
  * case it has free'd the passed handle for you. */
-struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
-					       int max_buffs)
+handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
 {
-	int ret;
 	journal_t *journal = osb->journal->j_journal;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 
 	BUG_ON(!osb || !osb->journal->j_journal);
 
@@ -149,77 +132,39 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
 		BUG();
 	}
 
-	handle = ocfs2_alloc_handle(osb);
-	if (!handle) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		return ERR_PTR(ret);
-	}
-
 	down_read(&osb->journal->j_trans_barrier);
 
-	/* actually start the transaction now */
-	handle->k_handle = journal_start(journal, max_buffs);
-	if (IS_ERR(handle->k_handle)) {
+	handle = journal_start(journal, max_buffs);
+	if (IS_ERR(handle)) {
 		up_read(&osb->journal->j_trans_barrier);
-		kfree(handle);
 
-		ret = PTR_ERR(handle->k_handle);
-		handle->k_handle = NULL;
-		mlog_errno(ret);
+		mlog_errno(PTR_ERR(handle));
 
 		if (is_journal_aborted(journal)) {
 			ocfs2_abort(osb->sb, "Detected aborted journal");
-			ret = -EROFS;
+			handle = ERR_PTR(-EROFS);
 		}
-		return ERR_PTR(ret);
-	}
-
-	atomic_inc(&(osb->journal->j_num_trans));
+	} else
+		atomic_inc(&(osb->journal->j_num_trans));
 
 	return handle;
 }
 
-void ocfs2_commit_trans(struct ocfs2_super *osb,
-			struct ocfs2_journal_handle *handle)
+int ocfs2_commit_trans(struct ocfs2_super *osb,
+		       handle_t *handle)
 {
-	handle_t *jbd_handle;
-	int retval;
+	int ret;
 	struct ocfs2_journal *journal = osb->journal;
 
-	mlog_entry_void();
-
 	BUG_ON(!handle);
 
-	if (!handle->k_handle) {
-		kfree(handle);
-		mlog_exit_void();
-		return;
-	}
-
-	/* ocfs2_extend_trans may have had to call journal_restart
-	 * which will always commit the transaction, but may return
-	 * error for any number of reasons. If this is the case, we
-	 * clear k_handle as it's not valid any more. */
-	if (handle->k_handle) {
-		jbd_handle = handle->k_handle;
-
-		/* actually stop the transaction. if we've set h_sync,
-		 * it'll have been committed when we return */
-		retval = journal_stop(jbd_handle);
-		if (retval < 0) {
-			mlog_errno(retval);
-			mlog(ML_ERROR, "Could not commit transaction\n");
-			BUG();
-		}
-
-		handle->k_handle = NULL; /* it's been free'd in journal_stop */
-	}
+	ret = journal_stop(handle);
+	if (ret < 0)
+		mlog_errno(ret);
 
 	up_read(&journal->j_trans_barrier);
 
-	kfree(handle);
-	mlog_exit_void();
+	return ret;
 }
 
 /*
@@ -268,7 +213,7 @@ bail:
 	return status;
 }
 
-int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
+int ocfs2_journal_access(handle_t *handle,
 			 struct inode *inode,
 			 struct buffer_head *bh,
 			 int type)
@@ -306,11 +251,11 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
 	switch (type) {
 	case OCFS2_JOURNAL_ACCESS_CREATE:
 	case OCFS2_JOURNAL_ACCESS_WRITE:
-		status = journal_get_write_access(handle->k_handle, bh);
+		status = journal_get_write_access(handle, bh);
 		break;
 
 	case OCFS2_JOURNAL_ACCESS_UNDO:
-		status = journal_get_undo_access(handle->k_handle, bh);
+		status = journal_get_undo_access(handle, bh);
 		break;
 
 	default:
@@ -327,7 +272,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
 	return status;
 }
 
-int ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
+int ocfs2_journal_dirty(handle_t *handle,
 			struct buffer_head *bh)
 {
 	int status;
@@ -335,7 +280,7 @@ int ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
 	mlog_entry("(bh->b_blocknr=%llu)\n",
 		   (unsigned long long)bh->b_blocknr);
 
-	status = journal_dirty_metadata(handle->k_handle, bh);
+	status = journal_dirty_metadata(handle, bh);
 	if (status < 0)
 		mlog(ML_ERROR, "Could not dirty metadata buffer. "
 		     "(bh->b_blocknr=%llu)\n",
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c0d15c5e49..d86cb960b7e 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -37,7 +37,6 @@ enum ocfs2_journal_state {
 
 struct ocfs2_super;
 struct ocfs2_dinode;
-struct ocfs2_journal_handle;
 
 struct ocfs2_journal {
 	enum ocfs2_journal_state   j_state;    /* Journals current state   */
@@ -133,10 +132,6 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 	spin_unlock(&trans_inc_lock);
 }
 
-struct ocfs2_journal_handle {
-	handle_t            *k_handle; /* kernel handle.                */
-};
-
 /* Exported only for the journal struct init code in super.c. Do not call. */
 void ocfs2_complete_recovery(void *data);
 
@@ -200,7 +195,11 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
  *  ocfs2_start_trans      - Begin a transaction. Give it an upper estimate of
  *                          the number of blocks that will be changed during
  *                          this handle.
- *  ocfs2_commit_trans     - Complete a handle.
+ *  ocfs2_commit_trans - Complete a handle. It might return -EIO if
+ *                       the journal was aborted. The majority of paths don't
+ *                       check the return value as an error there comes too
+ *                       late to do anything (and will be picked up in a
+ *                       later transaction).
  *  ocfs2_extend_trans     - Extend a handle by nblocks credits. This may
  *                          commit the handle to disk in the process, but will
  *                          not release any locks taken during the transaction.
@@ -215,10 +214,10 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 /* You must always start_trans with a number of buffs > 0, but it's
  * perfectly legal to go through an entire transaction without having
  * dirtied any buffers. */
-struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
+handle_t		    *ocfs2_start_trans(struct ocfs2_super *osb,
 					       int max_buffs);
-void			     ocfs2_commit_trans(struct ocfs2_super *osb,
-						struct ocfs2_journal_handle *handle);
+int			     ocfs2_commit_trans(struct ocfs2_super *osb,
+						handle_t *handle);
 int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 
 /*
@@ -236,7 +235,7 @@ int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 #define OCFS2_JOURNAL_ACCESS_WRITE  1
 #define OCFS2_JOURNAL_ACCESS_UNDO   2
 
-int                  ocfs2_journal_access(struct ocfs2_journal_handle *handle,
+int                  ocfs2_journal_access(handle_t *handle,
 					  struct inode *inode,
 					  struct buffer_head *bh,
 					  int type);
@@ -259,7 +258,7 @@ int                  ocfs2_journal_access(struct ocfs2_journal_handle *handle,
  *	<modify the bh>
  * 	ocfs2_journal_dirty(handle, bh);
  */
-int                  ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
+int                  ocfs2_journal_dirty(handle_t *handle,
 					 struct buffer_head *bh);
 int                  ocfs2_journal_dirty_data(handle_t *handle,
 					      struct buffer_head *bh);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 2ae567a7042..698d79a74ef 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -58,7 +58,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
 
 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
-				    struct ocfs2_journal_handle *handle,
+				    handle_t *handle,
 				    struct ocfs2_dinode *alloc,
 				    struct inode *main_bm_inode,
 				    struct buffer_head *main_bm_bh);
@@ -69,7 +69,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 						struct buffer_head **bitmap_bh);
 
 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
-					struct ocfs2_journal_handle *handle,
+					handle_t *handle,
 					struct ocfs2_alloc_context *ac);
 
 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
@@ -195,7 +195,7 @@ bail:
 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 {
 	int status;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct inode *local_alloc_inode = NULL;
 	struct buffer_head *bh = NULL;
 	struct buffer_head *main_bm_bh = NULL;
@@ -382,7 +382,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 					struct ocfs2_dinode *alloc)
 {
 	int status;
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct buffer_head *main_bm_bh = NULL;
 	struct inode *main_bm_inode;
 
@@ -414,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	}
 
 	/* we want the bitmap change to be recorded on disk asap */
-	handle->k_handle->h_sync = 1;
+	handle->h_sync = 1;
 
 	status = ocfs2_sync_local_to_main(osb, handle, alloc,
 					  main_bm_inode, main_bm_bh);
@@ -521,7 +521,7 @@ bail:
 }
 
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
-				 struct ocfs2_journal_handle *handle,
+				 handle_t *handle,
 				 struct ocfs2_alloc_context *ac,
 				 u32 min_bits,
 				 u32 *bit_off,
@@ -699,7 +699,7 @@ static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  * passed is used for caching.
  */
 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
-				    struct ocfs2_journal_handle *handle,
+				    handle_t *handle,
 				    struct ocfs2_dinode *alloc,
 				    struct inode *main_bm_inode,
 				    struct buffer_head *main_bm_bh)
@@ -811,7 +811,7 @@ bail:
  * pass it the bitmap lock in lock_bh if you have it.
  */
 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
-					struct ocfs2_journal_handle *handle,
+					handle_t *handle,
 					struct ocfs2_alloc_context *ac)
 {
 	int status = 0;
@@ -878,7 +878,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 	int status = 0;
 	struct buffer_head *main_bm_bh = NULL;
 	struct inode *main_bm_inode = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_dinode *alloc;
 	struct ocfs2_dinode *alloc_copy = NULL;
 	struct ocfs2_alloc_context *ac = NULL;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 4bd624ce380..385a10152f9 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -46,7 +46,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 				   struct ocfs2_alloc_context *ac);
 
 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
-				 struct ocfs2_journal_handle *handle,
+				 handle_t *handle,
 				 struct ocfs2_alloc_context *ac,
 				 u32 min_bits,
 				 u32 *bit_off,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d63ab7be89a..583bffe9809 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -75,12 +75,12 @@ static int inline ocfs2_search_dirblock(struct buffer_head *bh,
 					unsigned long offset,
 					struct ocfs2_dir_entry **res_dir);
 
-static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
+static int ocfs2_delete_entry(handle_t *handle,
 			      struct inode *dir,
 			      struct ocfs2_dir_entry *de_del,
 			      struct buffer_head *bh);
 
-static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
+static int __ocfs2_add_entry(handle_t *handle,
 			     struct inode *dir,
 			     const char *name, int namelen,
 			     struct inode *inode, u64 blkno,
@@ -93,12 +93,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
-			      struct ocfs2_journal_handle *handle,
+			      handle_t *handle,
 			      struct inode **ret_inode,
 			      struct ocfs2_alloc_context *inode_ac);
 
 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
-			      struct ocfs2_journal_handle *handle,
+			      handle_t *handle,
 			      struct inode *parent,
 			      struct inode *inode,
 			      struct buffer_head *fe_bh,
@@ -111,7 +111,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct buffer_head **de_bh);
 
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
+			    handle_t *handle,
 			    struct inode *inode,
 			    struct ocfs2_dinode *fe,
 			    char *name,
@@ -119,11 +119,11 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    struct inode *orphan_dir_inode);
 
 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     struct inode *inode,
 				     const char *symname);
 
-static inline int ocfs2_add_entry(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_add_entry(handle_t *handle,
 				  struct dentry *dentry,
 				  struct inode *inode, u64 blkno,
 				  struct buffer_head *parent_fe_bh,
@@ -236,7 +236,7 @@ bail:
 }
 
 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
-			      struct ocfs2_journal_handle *handle,
+			      handle_t *handle,
 			      struct inode *parent,
 			      struct inode *inode,
 			      struct buffer_head *fe_bh,
@@ -311,7 +311,7 @@ static int ocfs2_mknod(struct inode *dir,
 {
 	int status = 0;
 	struct buffer_head *parent_fe_bh = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_super *osb;
 	struct ocfs2_dinode *dirfe;
 	struct buffer_head *new_fe_bh = NULL;
@@ -476,7 +476,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
-			      struct ocfs2_journal_handle *handle,
+			      handle_t *handle,
 			      struct inode **ret_inode,
 			      struct ocfs2_alloc_context *inode_ac)
 {
@@ -642,7 +642,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		      struct inode *dir,
 		      struct dentry *dentry)
 {
-	struct ocfs2_journal_handle *handle;
+	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
 	int err;
 	struct buffer_head *fe_bh = NULL;
@@ -807,7 +807,7 @@ static int ocfs2_unlink(struct inode *dir,
 	struct ocfs2_dinode *fe = NULL;
 	struct buffer_head *fe_bh = NULL;
 	struct buffer_head *parent_node_bh = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_dir_entry *dirent = NULL;
 	struct buffer_head *dirent_bh = NULL;
 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
@@ -1069,7 +1069,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	struct buffer_head *insert_entry_bh = NULL;
 	struct ocfs2_super *osb = NULL;
 	u64 newfe_blkno;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct buffer_head *old_dir_bh = NULL;
 	struct buffer_head *new_dir_bh = NULL;
 	struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry
@@ -1478,7 +1478,7 @@ bail:
  * data, including the null terminator.
  */
 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     struct inode *inode,
 				     const char *symname)
 {
@@ -1593,7 +1593,7 @@ static int ocfs2_symlink(struct inode *dir,
 	struct buffer_head *parent_fe_bh = NULL;
 	struct ocfs2_dinode *fe = NULL;
 	struct ocfs2_dinode *dirfe;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	struct ocfs2_alloc_context *inode_ac = NULL;
 	struct ocfs2_alloc_context *data_ac = NULL;
 
@@ -1784,7 +1784,7 @@ int ocfs2_check_dir_entry(struct inode * dir,
  * If you pass me insert_bh, I'll skip the search of the other dir
  * blocks and put the record in there.
  */
-static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
+static int __ocfs2_add_entry(handle_t *handle,
 			     struct inode *dir,
 			     const char *name, int namelen,
 			     struct inode *inode, u64 blkno,
@@ -1870,7 +1870,7 @@ bail:
  * ocfs2_delete_entry deletes a directory entry by merging it with the
  * previous entry
  */
-static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
+static int ocfs2_delete_entry(handle_t *handle,
 			      struct inode *dir,
 			      struct ocfs2_dir_entry *de_del,
 			      struct buffer_head *bh)
@@ -2159,7 +2159,7 @@ leave:
 }
 
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
-			    struct ocfs2_journal_handle *handle,
+			    handle_t *handle,
 			    struct inode *inode,
 			    struct ocfs2_dinode *fe,
 			    char *name,
@@ -2232,7 +2232,7 @@ leave:
 
 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
 int ocfs2_orphan_del(struct ocfs2_super *osb,
-		     struct ocfs2_journal_handle *handle,
+		     handle_t *handle,
 		     struct inode *orphan_dir_inode,
 		     struct inode *inode,
 		     struct buffer_head *orphan_dir_bh)
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index deaaa97dbf0..8425944fccc 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -39,7 +39,7 @@ struct buffer_head *ocfs2_find_entry(const char *name,
 				     struct inode *dir,
 				     struct ocfs2_dir_entry **res_dir);
 int ocfs2_orphan_del(struct ocfs2_super *osb,
-		     struct ocfs2_journal_handle *handle,
+		     handle_t *handle,
 		     struct inode *orphan_dir_inode,
 		     struct inode *inode,
 		     struct buffer_head *orphan_dir_bh);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 0462a7f4e21..553e7021b6a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/jbd.h>
 
 #include "cluster/nodemanager.h"
 #include "cluster/heartbeat.h"
@@ -181,7 +182,6 @@ enum ocfs2_mount_options
 #define OCFS2_OSB_ERROR_FS	0x0004
 
 struct ocfs2_journal;
-struct ocfs2_journal_handle;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 186d2c3d524..000d71cca6c 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -49,7 +49,7 @@
 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+static int ocfs2_block_group_fill(handle_t *handle,
 				  struct inode *alloc_inode,
 				  struct buffer_head *bg_bh,
 				  u64 group_blkno,
@@ -69,7 +69,7 @@ static int ocfs2_block_group_search(struct inode *inode,
 				    u16 *bit_off, u16 *bits_found);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     struct ocfs2_alloc_context *ac,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     u32 bits_wanted,
 				     u32 min_bits,
 				     u16 *bit_off,
@@ -77,20 +77,20 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     u64 *bg_blkno);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 					 int nr);
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
 					     struct inode *alloc_inode,
 					     struct ocfs2_group_desc *bg,
 					     struct buffer_head *group_bh,
 					     unsigned int bit_off,
 					     unsigned int num_bits);
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 					       struct inode *alloc_inode,
 					       struct ocfs2_group_desc *bg,
 					       struct buffer_head *group_bh,
 					       unsigned int bit_off,
 					       unsigned int num_bits);
 
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
 				    struct inode *alloc_inode,
 				    struct buffer_head *fe_bh,
 				    struct buffer_head *bg_bh,
@@ -98,7 +98,7 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
 				    u16 chain);
 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
 						     u32 wanted);
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
+static int ocfs2_free_suballoc_bits(handle_t *handle,
 				    struct inode *alloc_inode,
 				    struct buffer_head *alloc_bh,
 				    unsigned int start_bit,
@@ -196,7 +196,7 @@ static int ocfs2_check_group_descriptor(struct super_block *sb,
 	return 0;
 }
 
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+static int ocfs2_block_group_fill(handle_t *handle,
 				  struct inode *alloc_inode,
 				  struct buffer_head *bg_bh,
 				  u64 group_blkno,
@@ -279,7 +279,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
 	struct ocfs2_chain_list *cl;
 	struct ocfs2_alloc_context *ac = NULL;
-	struct ocfs2_journal_handle *handle = NULL;
+	handle_t *handle = NULL;
 	u32 bit_off, num_bits;
 	u16 alloc_rec;
 	u64 bg_blkno;
@@ -747,7 +747,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
 	return status;
 }
 
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
 					     struct inode *alloc_inode,
 					     struct ocfs2_group_desc *bg,
 					     struct buffer_head *group_bh,
@@ -818,7 +818,7 @@ static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
 	return best;
 }
 
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
 				    struct inode *alloc_inode,
 				    struct buffer_head *fe_bh,
 				    struct buffer_head *bg_bh,
@@ -998,7 +998,7 @@ static int ocfs2_block_group_search(struct inode *inode,
 }
 
 static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
-				       struct ocfs2_journal_handle *handle,
+				       handle_t *handle,
 				       struct buffer_head *di_bh,
 				       u32 num_bits,
 				       u16 chain)
@@ -1028,7 +1028,7 @@ out:
 }
 
 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
-				  struct ocfs2_journal_handle *handle,
+				  handle_t *handle,
 				  u32 bits_wanted,
 				  u32 min_bits,
 				  u16 *bit_off,
@@ -1088,7 +1088,7 @@ out:
 }
 
 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
-			      struct ocfs2_journal_handle *handle,
+			      handle_t *handle,
 			      u32 bits_wanted,
 			      u32 min_bits,
 			      u16 *bit_off,
@@ -1245,7 +1245,7 @@ bail:
 /* will give out up to bits_wanted contiguous bits. */
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 				     struct ocfs2_alloc_context *ac,
-				     struct ocfs2_journal_handle *handle,
+				     handle_t *handle,
 				     u32 bits_wanted,
 				     u32 min_bits,
 				     u16 *bit_off,
@@ -1362,7 +1362,7 @@ bail:
 }
 
 int ocfs2_claim_metadata(struct ocfs2_super *osb,
-			 struct ocfs2_journal_handle *handle,
+			 handle_t *handle,
 			 struct ocfs2_alloc_context *ac,
 			 u32 bits_wanted,
 			 u16 *suballoc_bit_start,
@@ -1399,7 +1399,7 @@ bail:
 }
 
 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
-			  struct ocfs2_journal_handle *handle,
+			  handle_t *handle,
 			  struct ocfs2_alloc_context *ac,
 			  u16 *suballoc_bit,
 			  u64 *fe_blkno)
@@ -1502,7 +1502,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
  * of any size.
  */
 int ocfs2_claim_clusters(struct ocfs2_super *osb,
-			 struct ocfs2_journal_handle *handle,
+			 handle_t *handle,
 			 struct ocfs2_alloc_context *ac,
 			 u32 min_clusters,
 			 u32 *cluster_start,
@@ -1572,7 +1572,7 @@ bail:
 	return status;
 }
 
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 					       struct inode *alloc_inode,
 					       struct ocfs2_group_desc *bg,
 					       struct buffer_head *group_bh,
@@ -1627,7 +1627,7 @@ bail:
 /*
  * expects the suballoc inode to already be locked.
  */
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
+static int ocfs2_free_suballoc_bits(handle_t *handle,
 				    struct inode *alloc_inode,
 				    struct buffer_head *alloc_bh,
 				    unsigned int start_bit,
@@ -1711,7 +1711,7 @@ static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
 	return group;
 }
 
-int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
+int ocfs2_free_dinode(handle_t *handle,
 		      struct inode *inode_alloc_inode,
 		      struct buffer_head *inode_alloc_bh,
 		      struct ocfs2_dinode *di)
@@ -1724,7 +1724,7 @@ int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
 					inode_alloc_bh, bit, bg_blkno, 1);
 }
 
-int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
+int ocfs2_free_extent_block(handle_t *handle,
 			    struct inode *eb_alloc_inode,
 			    struct buffer_head *eb_alloc_bh,
 			    struct ocfs2_extent_block *eb)
@@ -1737,7 +1737,7 @@ int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
 					bit, bg_blkno, 1);
 }
 
-int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
+int ocfs2_free_clusters(handle_t *handle,
 		       struct inode *bitmap_inode,
 		       struct buffer_head *bitmap_bh,
 		       u64 start_blk,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e7c4b277c8d..1a3c94cb925 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -68,33 +68,33 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 			   struct ocfs2_alloc_context **ac);
 
 int ocfs2_claim_metadata(struct ocfs2_super *osb,
-			 struct ocfs2_journal_handle *handle,
+			 handle_t *handle,
 			 struct ocfs2_alloc_context *ac,
 			 u32 bits_wanted,
 			 u16 *suballoc_bit_start,
 			 u32 *num_bits,
 			 u64 *blkno_start);
 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
-			  struct ocfs2_journal_handle *handle,
+			  handle_t *handle,
 			  struct ocfs2_alloc_context *ac,
 			  u16 *suballoc_bit,
 			  u64 *fe_blkno);
 int ocfs2_claim_clusters(struct ocfs2_super *osb,
-			 struct ocfs2_journal_handle *handle,
+			 handle_t *handle,
 			 struct ocfs2_alloc_context *ac,
 			 u32 min_clusters,
 			 u32 *cluster_start,
 			 u32 *num_clusters);
 
-int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
+int ocfs2_free_dinode(handle_t *handle,
 		      struct inode *inode_alloc_inode,
 		      struct buffer_head *inode_alloc_bh,
 		      struct ocfs2_dinode *di);
-int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
+int ocfs2_free_extent_block(handle_t *handle,
 			    struct inode *eb_alloc_inode,
 			    struct buffer_head *eb_alloc_bh,
 			    struct ocfs2_extent_block *eb);
-int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
+int ocfs2_free_clusters(handle_t *handle,
 			struct inode *bitmap_inode,
 			struct buffer_head *bitmap_bh,
 			u64 start_blk,
-- 
cgit v1.2.3


From 55ed16029d597622db8121270e687373b5e31722 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 20 Oct 2006 14:55:54 -0700
Subject: configfs: mutex_lock_nested() fix

configfs_unregister_subsystem() nests a pair of inode i_mutex acquisitions,
and thus needs annotation via mutex_lock_nested().

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8a3b6a1a6ad..452cfd1e785 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1176,8 +1176,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 		return;
 	}
 
-	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
-	mutex_lock(&dentry->d_inode->i_mutex);
+	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
+			  I_MUTEX_PARENT);
+	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
 	if (configfs_detach_prep(dentry)) {
 		printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
 	}
-- 
cgit v1.2.3


From e88d0c9a4180821ad64c1fb421e4c28f8155eb74 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 17:06:53 -0700
Subject: ocfs2: Remove ocfs2_write_should_remove_suid()

Use should_remove_suid() instead.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fe6b795b1a4..39b2f1653e2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -943,20 +943,6 @@ out:
 	return ret;
 }
 
-static inline int ocfs2_write_should_remove_suid(struct inode *inode)
-{
-	mode_t mode = inode->i_mode;
-
-	if (!capable(CAP_FSETID)) {
-		if (unlikely(mode & S_ISUID))
-			return 1;
-
-		if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
-			return 1;
-	}
-	return 0;
-}
-
 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 				    const struct iovec *iov,
 				    unsigned long nr_segs,
@@ -1021,7 +1007,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		 * inode. There's also the dinode i_size state which
 		 * can be lost via setattr during extending writes (we
 		 * set inode->i_size at the end of a write. */
-		if (ocfs2_write_should_remove_suid(inode)) {
+		if (should_remove_suid(filp->f_dentry)) {
 			if (meta_level == 0) {
 				ocfs2_meta_unlock(inode, meta_level);
 				meta_level = 1;
-- 
cgit v1.2.3


From 8659ac25b434fcc61cf7797f4b69edc3eaaffb55 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Tue, 17 Oct 2006 18:29:52 -0700
Subject: ocfs2: Add splice support

Add splice read/write support in ocfs2.

ocfs2_file_splice_read/write are very similar to ocfs2_file_aio_read/write.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 187 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 137 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 39b2f1653e2..b32cdb3bf7c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
+#include <linux/pipe_fs_i.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -943,53 +944,21 @@ out:
 	return ret;
 }
 
-static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
-				    const struct iovec *iov,
-				    unsigned long nr_segs,
-				    loff_t pos)
+static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+					 loff_t *ppos,
+					 size_t count,
+					 int appending)
 {
-	int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
+	int ret = 0, meta_level = appending;
+	struct inode *inode = dentry->d_inode;
 	u32 clusters;
-	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
 	loff_t newsize, saved_pos;
 
-	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
-		   (unsigned int)nr_segs,
-		   filp->f_dentry->d_name.len,
-		   filp->f_dentry->d_name.name);
-
-	/* happy write of zero bytes */
-	if (iocb->ki_left == 0)
-		return 0;
-
-	if (!inode) {
-		mlog(0, "bad inode\n");
-		return -EIO;
-	}
-
-	mutex_lock(&inode->i_mutex);
-	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
-	if (filp->f_flags & O_DIRECT) {
-		have_alloc_sem = 1;
-		down_read(&inode->i_alloc_sem);
-	}
-
-	/* concurrent O_DIRECT writes are allowed */
-	rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
-	ret = ocfs2_rw_lock(inode, rw_level);
-	if (ret < 0) {
-		rw_level = -1;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	/* 
 	 * We sample i_size under a read level meta lock to see if our write
 	 * is extending the file, if it is we back off and get a write level
 	 * meta lock.
 	 */
-	meta_level = (filp->f_flags & O_APPEND) ? 1 : 0;
 	for(;;) {
 		ret = ocfs2_meta_lock(inode, NULL, meta_level);
 		if (ret < 0) {
@@ -1007,7 +976,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		 * inode. There's also the dinode i_size state which
 		 * can be lost via setattr during extending writes (we
 		 * set inode->i_size at the end of a write. */
-		if (should_remove_suid(filp->f_dentry)) {
+		if (should_remove_suid(dentry)) {
 			if (meta_level == 0) {
 				ocfs2_meta_unlock(inode, meta_level);
 				meta_level = 1;
@@ -1017,19 +986,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 			ret = ocfs2_write_remove_suid(inode);
 			if (ret < 0) {
 				mlog_errno(ret);
-				goto out;
+				goto out_unlock;
 			}
 		}
 
 		/* work on a copy of ppos until we're sure that we won't have
 		 * to recalculate it due to relocking. */
-		if (filp->f_flags & O_APPEND) {
+		if (appending) {
 			saved_pos = i_size_read(inode);
 			mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
 		} else {
-			saved_pos = iocb->ki_pos;
+			saved_pos = *ppos;
 		}
-		newsize = iocb->ki_left + saved_pos;
+		newsize = count + saved_pos;
 
 		mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
 		     (long long) saved_pos, (long long) newsize,
@@ -1062,19 +1031,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		if (!clusters)
 			break;
 
-		ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left);
+		ret = ocfs2_extend_file(inode, NULL, newsize, count);
 		if (ret < 0) {
 			if (ret != -ENOSPC)
 				mlog_errno(ret);
-			goto out;
+			goto out_unlock;
 		}
 		break;
 	}
 
-	/* ok, we're done with i_size and alloc work */
-	iocb->ki_pos = saved_pos;
+	if (appending)
+		*ppos = saved_pos;
+
+out_unlock:
 	ocfs2_meta_unlock(inode, meta_level);
-	meta_level = -1;
+
+out:
+	return ret;
+}
+
+static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
+				    const struct iovec *iov,
+				    unsigned long nr_segs,
+				    loff_t pos)
+{
+	int ret, rw_level, have_alloc_sem = 0;
+	struct file *filp = iocb->ki_filp;
+	struct inode *inode = filp->f_dentry->d_inode;
+	int appending = filp->f_flags & O_APPEND ? 1 : 0;
+
+	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+		   (unsigned int)nr_segs,
+		   filp->f_dentry->d_name.len,
+		   filp->f_dentry->d_name.name);
+
+	/* happy write of zero bytes */
+	if (iocb->ki_left == 0)
+		return 0;
+
+	mutex_lock(&inode->i_mutex);
+	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
+	if (filp->f_flags & O_DIRECT) {
+		have_alloc_sem = 1;
+		down_read(&inode->i_alloc_sem);
+	}
+
+	/* concurrent O_DIRECT writes are allowed */
+	rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
+	ret = ocfs2_rw_lock(inode, rw_level);
+	if (ret < 0) {
+		rw_level = -1;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos,
+					    iocb->ki_left, appending);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
 
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb);
@@ -1100,8 +1116,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	}
 
 out:
-	if (meta_level != -1)
-		ocfs2_meta_unlock(inode, meta_level);
 	if (have_alloc_sem)
 		up_read(&inode->i_alloc_sem);
 	if (rw_level != -1) 
@@ -1112,6 +1126,77 @@ out:
 	return ret;
 }
 
+static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
+				       struct file *out,
+				       loff_t *ppos,
+				       size_t len,
+				       unsigned int flags)
+{
+	int ret;
+	struct inode *inode = out->f_dentry->d_inode;
+
+	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
+		   (unsigned int)len,
+		   out->f_dentry->d_name.len,
+		   out->f_dentry->d_name.name);
+
+	inode_double_lock(inode, pipe->inode);
+
+	ret = ocfs2_rw_lock(inode, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	/* ok, we're done with i_size and alloc work */
+	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
+
+out_unlock:
+	ocfs2_rw_unlock(inode, 1);
+out:
+	inode_double_unlock(inode, pipe->inode);
+
+	mlog_exit(ret);
+	return ret;
+}
+
+static ssize_t ocfs2_file_splice_read(struct file *in,
+				      loff_t *ppos,
+				      struct pipe_inode_info *pipe,
+				      size_t len,
+				      unsigned int flags)
+{
+	int ret = 0;
+	struct inode *inode = in->f_dentry->d_inode;
+
+	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
+		   (unsigned int)len,
+		   in->f_dentry->d_name.len,
+		   in->f_dentry->d_name.name);
+
+	/*
+	 * See the comment in ocfs2_file_aio_read()
+	 */
+	ret = ocfs2_meta_lock(inode, NULL, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto bail;
+	}
+	ocfs2_meta_unlock(inode, 0);
+
+	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
+
+bail:
+	mlog_exit(ret);
+	return ret;
+}
+
 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 				   const struct iovec *iov,
 				   unsigned long nr_segs,
@@ -1210,6 +1295,8 @@ const struct file_operations ocfs2_fops = {
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
 	.ioctl		= ocfs2_ioctl,
+	.splice_read	= ocfs2_file_splice_read,
+	.splice_write	= ocfs2_file_splice_write,
 };
 
 const struct file_operations ocfs2_dops = {
-- 
cgit v1.2.3


From 7f1a37e31f94b4f1c123d32ce9f69205ab2095bd Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Wed, 15 Nov 2006 15:48:42 +0800
Subject: ocfs2: core atime update functions

This patch adds the core routines for updating atime in ocfs2.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 39 +++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/dlmglue.h |  3 +++
 fs/ocfs2/file.c    | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/file.h    |  5 +++++
 fs/ocfs2/inode.c   |  1 -
 fs/ocfs2/ocfs2.h   |  2 ++
 fs/ocfs2/super.c   | 15 +++++++++++++++
 7 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7a22118ef04..69fba16efbd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -49,6 +49,7 @@
 #include "dcache.h"
 #include "dlmglue.h"
 #include "extent_map.h"
+#include "file.h"
 #include "heartbeat.h"
 #include "inode.h"
 #include "journal.h"
@@ -1723,6 +1724,44 @@ int ocfs2_meta_lock_with_page(struct inode *inode,
 	return ret;
 }
 
+int ocfs2_meta_lock_atime(struct inode *inode,
+			  struct vfsmount *vfsmnt,
+			  int *level)
+{
+	int ret;
+
+	mlog_entry_void();
+	ret = ocfs2_meta_lock(inode, NULL, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	/*
+	 * If we should update atime, we will get EX lock,
+	 * otherwise we just get PR lock.
+	 */
+	if (ocfs2_should_update_atime(inode, vfsmnt)) {
+		struct buffer_head *bh = NULL;
+
+		ocfs2_meta_unlock(inode, 0);
+		ret = ocfs2_meta_lock(inode, &bh, 1);
+		if (ret < 0) {
+			mlog_errno(ret);
+			return ret;
+		}
+		*level = 1;
+		if (ocfs2_should_update_atime(inode, vfsmnt))
+			ocfs2_update_inode_atime(inode, bh);
+		if (bh)
+			brelse(bh);
+	} else
+		*level = 0;
+
+	mlog_exit(ret);
+	return ret;
+}
+
 void ocfs2_meta_unlock(struct inode *inode,
 		       int ex)
 {
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index a1165edef43..c343fca68cf 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -80,6 +80,9 @@ void ocfs2_data_unlock(struct inode *inode,
 		       int write);
 int ocfs2_rw_lock(struct inode *inode, int write);
 void ocfs2_rw_unlock(struct inode *inode, int write);
+int ocfs2_meta_lock_atime(struct inode *inode,
+			  struct vfsmount *vfsmnt,
+			  int *level);
 int ocfs2_meta_lock_full(struct inode *inode,
 			 struct buffer_head **ret_bh,
 			 int ex,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b32cdb3bf7c..e82288f7cf2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -32,6 +32,7 @@
 #include <linux/uio.h>
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/mount.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -135,6 +136,57 @@ bail:
 	return (err < 0) ? -EIO : 0;
 }
 
+int ocfs2_should_update_atime(struct inode *inode,
+			      struct vfsmount *vfsmnt)
+{
+	struct timespec now;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+		return 0;
+
+	if ((inode->i_flags & S_NOATIME) ||
+	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
+		return 0;
+
+	if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
+	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
+		return 0;
+
+	now = CURRENT_TIME;
+	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
+		return 0;
+	else
+		return 1;
+}
+
+int ocfs2_update_inode_atime(struct inode *inode,
+			     struct buffer_head *bh)
+{
+	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	handle_t *handle;
+
+	mlog_entry_void();
+
+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+	if (handle == NULL) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	inode->i_atime = CURRENT_TIME;
+	ret = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (ret < 0)
+		mlog_errno(ret);
+
+	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out:
+	mlog_exit(ret);
+	return ret;
+}
+
 int ocfs2_set_inode_size(handle_t *handle,
 			 struct inode *inode,
 			 struct buffer_head *fe_bh,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 84f8b974dca..475ed8aee8b 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,4 +54,9 @@ int ocfs2_set_inode_size(handle_t *handle,
 			 struct buffer_head *fe_bh,
 			 u64 new_i_size);
 
+int ocfs2_should_update_atime(struct inode *inode,
+			      struct vfsmount *vfsmnt);
+int ocfs2_update_inode_atime(struct inode *inode,
+			     struct buffer_head *bh);
+
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ad4d4a1df77..42e361f3054 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -360,7 +360,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 				  inode);
 
 	ocfs2_set_inode_flags(inode);
-	inode->i_flags |= S_NOATIME;
 
 	status = 0;
 bail:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 553e7021b6a..078883772bd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -180,6 +180,7 @@ enum ocfs2_mount_options
 #define OCFS2_OSB_SOFT_RO	0x0001
 #define OCFS2_OSB_HARD_RO	0x0002
 #define OCFS2_OSB_ERROR_FS	0x0004
+#define OCFS2_DEFAULT_ATIME_QUANTUM	60
 
 struct ocfs2_journal;
 struct ocfs2_super
@@ -218,6 +219,7 @@ struct ocfs2_super
 	unsigned long osb_flags;
 
 	unsigned long s_mount_opt;
+	unsigned int s_atime_quantum;
 
 	u16 max_slots;
 	s16 node_num;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d9b1eb6a3c6..b0992573dee 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -139,6 +139,7 @@ enum {
 	Opt_hb_local,
 	Opt_data_ordered,
 	Opt_data_writeback,
+	Opt_atime_quantum,
 	Opt_err,
 };
 
@@ -152,6 +153,7 @@ static match_table_t tokens = {
 	{Opt_hb_local, OCFS2_HB_LOCAL},
 	{Opt_data_ordered, "data=ordered"},
 	{Opt_data_writeback, "data=writeback"},
+	{Opt_atime_quantum, "atime_quantum=%u"},
 	{Opt_err, NULL}
 };
 
@@ -705,6 +707,7 @@ static int ocfs2_parse_options(struct super_block *sb,
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token, option;
 		substring_t args[MAX_OPT_ARGS];
+		struct ocfs2_super * osb = OCFS2_SB(sb);
 
 		if (!*p)
 			continue;
@@ -745,6 +748,16 @@ static int ocfs2_parse_options(struct super_block *sb,
 		case Opt_data_writeback:
 			*mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
 			break;
+		case Opt_atime_quantum:
+			if (match_int(&args[0], &option)) {
+				status = 0;
+				goto bail;
+			}
+			if (option >= 0)
+				osb->s_atime_quantum = option;
+			else
+				osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+			break;
 		default:
 			mlog(ML_ERROR,
 			     "Unrecognized mount option \"%s\" "
@@ -1265,6 +1278,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	init_waitqueue_head(&osb->checkpoint_event);
 	atomic_set(&osb->needs_checkpoint, 0);
 
+	osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+
 	osb->node_num = O2NM_INVALID_NODE_NUM;
 	osb->slot_num = OCFS2_INVALID_SLOT;
 
-- 
cgit v1.2.3


From 25899deef46c226c49c53b42c00e0f032379c04b Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Wed, 15 Nov 2006 15:49:02 +0800
Subject: ocfs2: update file system paths to set atime

Conditionally update atime in ocfs2_file_aio_read(), ocfs2_readdir() and
ocfs2_mmap().

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dir.c     | 13 +++++++++++--
 fs/ocfs2/file.c    |  6 +++---
 fs/ocfs2/mmap.c    | 11 ++++++++++-
 fs/ocfs2/symlink.c |  4 ++++
 4 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 5efea44d780..baad2aa27c1 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -82,6 +82,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
 	unsigned int ra_sectors = 16;
+	int lock_level = 0;
 
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -89,7 +90,15 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	stored = 0;
 	bh = NULL;
 
-	error = ocfs2_meta_lock(inode, NULL, 0);
+	error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+	if (lock_level && error >= 0) {
+		/* We release EX lock which used to update atime
+		 * and get PR lock again to reduce contention
+		 * on commonly accessed directories. */
+		ocfs2_meta_unlock(inode, 1);
+		lock_level = 0;
+		error = ocfs2_meta_lock(inode, NULL, 0);
+	}
 	if (error < 0) {
 		if (error != -ENOENT)
 			mlog_errno(error);
@@ -198,7 +207,7 @@ revalidate:
 
 	stored = 0;
 bail:
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, lock_level);
 
 bail_nolock:
 	mlog_exit(stored);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e82288f7cf2..b64ef7c6e2d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1254,7 +1254,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 				   unsigned long nr_segs,
 				   loff_t pos)
 {
-	int ret = 0, rw_level = -1, have_alloc_sem = 0;
+	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
 	struct file *filp = iocb->ki_filp;
 	struct inode *inode = filp->f_dentry->d_inode;
 
@@ -1296,12 +1296,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	 * like i_size. This allows the checks down below
 	 * generic_file_aio_read() a chance of actually working. 
 	 */
-	ret = ocfs2_meta_lock(inode, NULL, 0);
+	ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
 	}
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, lock_level);
 
 	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
 	if (ret == -EINVAL)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 83934e33e5b..69f85ae392d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -82,6 +82,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
 
 int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	int ret = 0, lock_level = 0;
+
 	/* We don't want to support shared writable mappings yet. */
 	if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
 	    && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
@@ -91,7 +93,14 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	file_accessed(file);
+	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
+				    file->f_vfsmnt, &lock_level);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
+out:
 	vma->vm_ops = &ocfs2_file_vm_ops;
 	return 0;
 }
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index c0f68aa6c17..957d6878b03 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -126,6 +126,10 @@ static int ocfs2_readlink(struct dentry *dentry,
 		goto out;
 	}
 
+	/*
+	 * Without vfsmount we can't update atime now,
+	 * but we will update atime here ultimately.
+	 */
 	ret = vfs_readlink(dentry, buffer, buflen, link);
 
 	brelse(bh);
-- 
cgit v1.2.3


From 58d206c2fa5cc0b35bf3ca48324d7633aa7d0412 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 20 Nov 2006 03:24:00 +0100
Subject: configfs: make configfs_dirent_exists() static

This patch makes the needlessly global configfs_dirent_exists() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 452cfd1e785..c398861f78a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -93,8 +93,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
  *
  * called with parent inode's i_mutex held
  */
-int configfs_dirent_exists(struct configfs_dirent *parent_sd,
-			   const unsigned char *new)
+static int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+				  const unsigned char *new)
 {
 	struct configfs_dirent * sd;
 
-- 
cgit v1.2.3


From d38eb8db6aa359c060dfb72a29cf8d94a96657d8 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Mon, 27 Nov 2006 09:59:21 +0800
Subject: ocfs2: implement i_op->permission

Implement .permission() in ocfs2_file_iops, ocfs2_special_file_iops and
ocfs2_dir_iops.

This helps us avoid some multi-node races with mode change and vfs
operations.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c  | 24 ++++++++++++++++++++++++
 fs/ocfs2/file.h  |  2 ++
 fs/ocfs2/namei.c |  1 +
 3 files changed, 27 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b64ef7c6e2d..8786b3c490a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -945,6 +945,28 @@ bail:
 	return err;
 }
 
+int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	int ret;
+
+	mlog_entry_void();
+
+	ret = ocfs2_meta_lock(inode, NULL, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = generic_permission(inode, mask, NULL);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_meta_unlock(inode, 0);
+out:
+	mlog_exit(ret);
+	return ret;
+}
+
 static int ocfs2_write_remove_suid(struct inode *inode)
 {
 	int ret;
@@ -1329,11 +1351,13 @@ bail:
 struct inode_operations ocfs2_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
+	.permission	= ocfs2_permission,
 };
 
 struct inode_operations ocfs2_special_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
+	.permission	= ocfs2_permission,
 };
 
 const struct file_operations ocfs2_fops = {
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 475ed8aee8b..601a453f18a 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -48,6 +48,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
+int ocfs2_permission(struct inode *inode, int mask,
+		     struct nameidata *nd);
 
 int ocfs2_set_inode_size(handle_t *handle,
 			 struct inode *inode,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 583bffe9809..21db45ddf14 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2311,4 +2311,5 @@ struct inode_operations ocfs2_dir_iops = {
 	.rename		= ocfs2_rename,
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
+	.permission	= ocfs2_permission,
 };
-- 
cgit v1.2.3


From 5a874db4d9bfd8a4c6324d844a4d1c7cfa5cf2c4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 8 Nov 2006 00:19:38 -0800
Subject: [NET]: ipconfig and nfsroot annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nfs/nfsroot.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8dfefe41a8d..75f819dc025 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -98,7 +98,7 @@
 static char nfs_root_name[256] __initdata = "";
 
 /* Address of NFS server */
-static __u32 servaddr __initdata = 0;
+static __be32 servaddr __initdata = 0;
 
 /* Name of directory to mount */
 static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
@@ -327,7 +327,7 @@ static int __init root_nfs_name(char *name)
  */
 static int __init root_nfs_addr(void)
 {
-	if ((servaddr = root_server_addr) == INADDR_NONE) {
+	if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) {
 		printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
 		return -1;
 	}
@@ -411,7 +411,7 @@ __setup("nfsroot=", nfs_root_setup);
  *  Construct sockaddr_in from address and port number.
  */
 static inline void
-set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
+set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
 {
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = addr;
@@ -468,14 +468,13 @@ static int __init root_nfs_ports(void)
 		dprintk("Root-NFS: Portmapper on server returned %d "
 			"as nfsd port\n", port);
 	}
-	nfs_port = htons(nfs_port);
 
 	if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
 		printk(KERN_ERR "Root-NFS: Unable to get mountd port "
 				"number from server, using default\n");
 		port = mountd_port;
 	}
-	mount_port = htons(port);
+	mount_port = port;
 	dprintk("Root-NFS: mountd port is %d\n", port);
 
 	return 0;
@@ -496,7 +495,7 @@ static int __init root_nfs_get_handle(void)
 	int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
 					NFS_MNT3_VERSION : NFS_MNT_VERSION;
 
-	set_sockaddr(&sin, servaddr, mount_port);
+	set_sockaddr(&sin, servaddr, htons(mount_port));
 	status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
 	if (status < 0)
 		printk(KERN_ERR "Root-NFS: Server returned error %d "
@@ -519,6 +518,6 @@ void * __init nfs_root_data(void)
 	 || root_nfs_ports() < 0
 	 || root_nfs_get_handle() < 0)
 		return NULL;
-	set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
+	set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
 	return (void*)&nfs_data;
 }
-- 
cgit v1.2.3


From 3277c39f8d706afb6fefc02f49563a73bbd405b9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 14 Nov 2006 21:13:53 -0800
Subject: [NET]: Kill direct includes of asm/checksum.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/reiserfs/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 7bdb0ed443e..1e4d6859017 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -41,7 +41,7 @@
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
-#include <asm/checksum.h>
+#include <net/checksum.h>
 #include <linux/smp_lock.h>
 #include <linux/stat.h>
 #include <asm/semaphore.h>
-- 
cgit v1.2.3


From 914e26379decf1fd984b22e51fd2e4209b7a7f1b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Oct 2006 13:55:46 -0400
Subject: [PATCH] severing fs.h, radix-tree.h -> sched.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/conv.c     |  1 +
 fs/9p/fcall.c    |  1 +
 fs/9p/fid.c      |  1 +
 fs/9p/v9fs.c     |  1 +
 fs/9p/vfs_dir.c  |  1 +
 fs/9p/vfs_file.c |  1 +
 fs/inotify.c     |  1 +
 fs/jffs2/acl.c   |  1 +
 fs/jffs2/wbuf.c  |  1 +
 fs/jfs/ioctl.c   |  1 +
 fs/proc/root.c   |  1 +
 fs/super.c       | 18 ++++++++++++++++++
 fs/sync.c        |  1 +
 fs/utimes.c      |  1 +
 14 files changed, 31 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 56d88c1a09c..a3ed571eee3 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/idr.h>
 #include <asm/uaccess.h>
 #include "debug.h"
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
index 8556097fcda..dc336a67592 100644
--- a/fs/9p/fcall.c
+++ b/fs/9p/fcall.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/idr.h>
 
 #include "debug.h"
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 70492ccb438..27507201f9e 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/idr.h>
 
 #include "debug.h"
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 0f628041e3f..0b96fae8b47 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index e32d5971039..905c882f4e2 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -30,6 +30,7 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/smp_lock.h>
+#include <linux/sched.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c3c47eda757..79e6f9cd734 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/stat.h>
 #include <linux/string.h>
diff --git a/fs/inotify.c b/fs/inotify.c
index 723836a1f71..f5099d86fd9 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -27,6 +27,7 @@
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/writeback.h>
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 0ae3cd10702..73f0d60f73a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/crc32.h>
 #include <linux/jffs2.h>
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index b9b700730df..70707309dfa 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -19,6 +19,7 @@
 #include <linux/crc32.h>
 #include <linux/mtd/nand.h>
 #include <linux/jiffies.h>
+#include <linux/sched.h>
 
 #include "nodelist.h"
 
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 37db5248826..ed814b1ff4d 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -9,6 +9,7 @@
 #include <linux/ctype.h>
 #include <linux/capability.h>
 #include <linux/time.h>
+#include <linux/sched.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ffe66c38488..64d242b6dcf 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -13,6 +13,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
diff --git a/fs/super.c b/fs/super.c
index 47e554c12e7..84c320f6ad7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -220,6 +220,24 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 	return 0;
 }
 
+/*
+ * Superblock locking.  We really ought to get rid of these two.
+ */
+void lock_super(struct super_block * sb)
+{
+	get_fs_excl();
+	mutex_lock(&sb->s_lock);
+}
+
+void unlock_super(struct super_block * sb)
+{
+	put_fs_excl();
+	mutex_unlock(&sb->s_lock);
+}
+
+EXPORT_SYMBOL(lock_super);
+EXPORT_SYMBOL(unlock_super);
+
 /*
  * Write out and wait upon all dirty data associated with this
  * superblock.  Filesystem data as well as the underlying block
diff --git a/fs/sync.c b/fs/sync.c
index 1de747b5ddb..865f32be386 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -6,6 +6,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/syscalls.h>
 #include <linux/linkage.h>
diff --git a/fs/utimes.c b/fs/utimes.c
index 1bcd852fc4a..99cf2cb11fe 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -2,6 +2,7 @@
 #include <linux/fs.h>
 #include <linux/linkage.h>
 #include <linux/namei.h>
+#include <linux/sched.h>
 #include <linux/utime.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
-- 
cgit v1.2.3


From a1f8e7f7fb9d7e2cbcb53170edca7c0ac4680697 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Oct 2006 16:08:53 -0400
Subject: [PATCH] severing skbuff.h -> highmem.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 8d0a0018a7d..fde52d40c0b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -45,6 +45,7 @@
 #include <linux/personality.h>
 #include <linux/rwsem.h>
 #include <linux/tsacct_kern.h>
+#include <linux/highmem.h>
 #include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
-- 
cgit v1.2.3


From bd01f843c3368dcee735c19603251669f23f4477 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Oct 2006 17:23:57 -0400
Subject: [PATCH] severing skbuff.h -> poll.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c                 | 1 +
 fs/gfs2/locking/dlm/plock.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index fde52d40c0b..06dad665b88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -46,6 +46,7 @@
 #include <linux/rwsem.h>
 #include <linux/tsacct_kern.h>
 #include <linux/highmem.h>
+#include <linux/poll.h>
 #include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 7365aec9511..3799f19b282 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -8,6 +8,7 @@
 
 #include <linux/miscdevice.h>
 #include <linux/lock_dlm_plock.h>
+#include <linux/poll.h>
 
 #include "lock_dlm.h"
 
-- 
cgit v1.2.3


From bf1ab978be2318c5a564de9aa0f1a217b44170d4 Mon Sep 17 00:00:00 2001
From: Dwayne Grant McConnell <decimal@us.ibm.com>
Date: Thu, 23 Nov 2006 00:46:37 +0100
Subject: [POWERPC] coredump: Add SPU elf notes to coredump.

This patch adds SPU elf notes to the coredump. It creates a separate note
for each of /regs, /fpcr, /lslr, /decr, /decr_status, /mem, /signal1,
/signal1_type, /signal2, /signal2_type, /event_mask, /event_status,
/mbox_info, /ibox_info, /wbox_info, /dma_info, /proxydma_info, /object-id.

A new macro, ARCH_HAVE_EXTRA_NOTES, was created for architectures to
specify they have extra elf core notes.

A new macro, ELF_CORE_EXTRA_NOTES_SIZE, was created so the size of the
additional notes could be calculated and added to the notes phdr entry.

A new macro, ELF_CORE_WRITE_EXTRA_NOTES, was created so the new notes
would be written after the existing notes.

The SPU coredump code resides in spufs. Stub functions are provided in the
kernel which are hooked into the spufs code which does the actual work via
register_arch_coredump_calls().

A new set of __spufs_<file>_read/get() functions was provided to allow the
coredump code to read from the spufs files without having to lock the
SPU context for each file read from.

Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Dwayne Grant McConnell <decimal@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
---
 fs/binfmt_elf.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 79b05a1a436..cc72bb43061 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1582,6 +1582,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 		
 		sz += thread_status_size;
 
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+		sz += ELF_CORE_EXTRA_NOTES_SIZE;
+#endif
+
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
 		DUMP_WRITE(&phdr, sizeof(phdr));
@@ -1622,6 +1626,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 		if (!writenote(notes + i, file, &foffset))
 			goto end_coredump;
 
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+	ELF_CORE_WRITE_EXTRA_NOTES;
+#endif
+
 	/* write out the thread status notes section */
 	list_for_each(t, &thread_list) {
 		struct elf_thread_status *tmp =
-- 
cgit v1.2.3


From a79f43a54b7df7b12bc1c0fbf4ba76520ba40354 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 4 Dec 2006 15:45:46 +0000
Subject: [PATCH] Enable RAID autorun on Mac partition tables.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/mac.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index c0871002d00..d4a0fad3563 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -74,6 +74,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
 			be32_to_cpu(part->start_block) * (secsize/512),
 			be32_to_cpu(part->block_count) * (secsize/512));
 
+		if (!strnicmp(part->type, "Linux_RAID", 10))
+			state->parts[slot].flags = 1;
 #ifdef CONFIG_PPC_PMAC
 		/*
 		 * If this is the first bootable partition, tell the
-- 
cgit v1.2.3


From 887bc5d00c02b32763845247024e8db5243ef857 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 5 Dec 2006 13:34:17 -0500
Subject: [GFS2] Fix indent in recovery.c

As per comments from Andrew Morton and Jan Engelhardt, this fixes the
indent and removes the "static" from a variable declaration since its
not needed in this case (now allocated on the stack of the function
in question).

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Cc: Andrew Morton <akpm@osdl.org>
---
 fs/gfs2/recovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4acf238e1db..d0c806b85c8 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -136,7 +136,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 {
 	struct buffer_head *bh;
 	struct gfs2_log_header_host lh;
-static const u32 nothing = 0;
+	const u32 nothing = 0;
 	u32 hash;
 	int error;
 
-- 
cgit v1.2.3


From e6b3c4db6fbcd0d33720696f37790d6b8be12313 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 11 Nov 2006 22:18:03 -0500
Subject: Fix a second potential rpc_wakeup race...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8118036cc44..93ac05889cb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -636,7 +636,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		smp_wmb();
 	} else
 		status = data->rpc_status;
-	rpc_release_task(task);
+	rpc_put_task(task);
 	return status;
 }
 
@@ -742,7 +742,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		smp_wmb();
 	} else
 		status = data->rpc_status;
-	rpc_release_task(task);
+	rpc_put_task(task);
 	if (status != 0)
 		return status;
 
@@ -3067,7 +3067,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 		if (status == 0)
 			nfs_post_op_update_inode(inode, &data->fattr);
 	}
-	rpc_release_task(task);
+	rpc_put_task(task);
 	return status;
 }
 
@@ -3314,7 +3314,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	if (IS_ERR(task))
 		goto out;
 	status = nfs4_wait_for_completion_rpc_task(task);
-	rpc_release_task(task);
+	rpc_put_task(task);
 out:
 	return status;
 }
@@ -3430,7 +3430,7 @@ static void nfs4_lock_release(void *calldata)
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
 				data->arg.lock_seqid);
 		if (!IS_ERR(task))
-			rpc_release_task(task);
+			rpc_put_task(task);
 		dprintk("%s: cancelling lock!\n", __FUNCTION__);
 	} else
 		nfs_free_seqid(data->arg.lock_seqid);
@@ -3472,7 +3472,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 			ret = -EAGAIN;
 	} else
 		data->cancelled = 1;
-	rpc_release_task(task);
+	rpc_put_task(task);
 	dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret);
 	return ret;
 }
-- 
cgit v1.2.3


From 8aca67f0ae2d8811165c22326825a645cc8e1b48 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 13 Nov 2006 16:23:44 -0500
Subject: SUNRPC: Fix a potential race in rpc_wake_up_task()

Use RCU to ensure that we can safely call rpc_finish_wakeup after we've
called __rpc_do_wake_up_task. If not, there is a theoretical race, in which
the rpc_task finishes executing, and gets freed first.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c  |  8 +++++++-
 fs/nfs/write.c | 20 ++++++++++++++++----
 2 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c2e49c397a2..8b58bbf6e39 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -65,13 +65,19 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
 	return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+static void nfs_readdata_rcu_free(struct rcu_head *head)
 {
+	struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
 
+static void nfs_readdata_free(struct nfs_read_data *rdata)
+{
+	call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 883dd4a1c15..29d88209199 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void)
 	return p;
 }
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_rcu_free(struct rcu_head *head)
 {
+	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
+void nfs_commit_free(struct nfs_write_data *wdata)
+{
+	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
+}
+
 struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -131,13 +137,19 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len)
 	return p;
 }
 
-static void nfs_writedata_free(struct nfs_write_data *p)
+static void nfs_writedata_rcu_free(struct rcu_head *head)
 {
+	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
 
+static void nfs_writedata_free(struct nfs_write_data *wdata)
+{
+	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
+}
+
 void nfs_writedata_release(void *wdata)
 {
 	nfs_writedata_free(wdata);
@@ -258,7 +270,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 io_error:
 	nfs_end_data_update(inode);
 	end_page_writeback(page);
-	nfs_writedata_free(wdata);
+	nfs_writedata_release(wdata);
 	return written ? written : result;
 }
 
@@ -1043,7 +1055,7 @@ out_bad:
 	while (!list_empty(&list)) {
 		data = list_entry(list.next, struct nfs_write_data, pages);
 		list_del(&data->pages);
-		nfs_writedata_free(data);
+		nfs_writedata_release(data);
 	}
 	nfs_mark_request_dirty(req);
 	nfs_clear_page_writeback(req);
-- 
cgit v1.2.3


From 46b9f8e1484352f09f229107ba2a758fe386d7f7 Mon Sep 17 00:00:00 2001
From: Andy Ryan <genanr@allantgroup.com>
Date: Tue, 7 Nov 2006 14:36:26 -0600
Subject: NFS Exclusive open not supported bug

When trying to open a file with the O_EXCL flag over NFS on a server that does
not support exclusive mode, the file does not open.  The reason,
rpc_call_sync returns a errno number, and not the nfs error number.  I fixed
it by changing the status check in nfs3proc.c.  Either this is how it should
be fixed, or rpc_call_sync should be fixed to return the NFS error.

Signed-off-by: Andy Ryan <genanr@allantgroup.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e5f128ffc32..9d7ff4f83fa 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -369,7 +369,7 @@ again:
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
-	if (status == NFSERR_NOTSUPP) {
+	if (status == -ENOTSUPP) {
 		switch (arg.createmode) {
 			case NFS3_CREATE_EXCLUSIVE:
 				arg.createmode = NFS3_CREATE_GUARDED;
-- 
cgit v1.2.3


From 0b67130149b006628389ff3e8f46be9957af98aa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 14 Nov 2006 16:12:23 -0500
Subject: NFS: Fix asynchronous read error handling

We must always call ->read_done() before we truncate the page data, or
decide to flag an error. The reasons are that
	in NFSv2, ->read_done() is where the eof flag gets set.
	in NFSv3/v4 ->read_done() handles EJUKEBOX-type errors, and
		  v4 state recovery.

However, we need to mark the pages as uptodate before we deal with short
read errors, since we may need to modify the nfs_read_data arguments.

We therefore split the current nfs_readpage_result() into two parts:
nfs_readpage_result(), which calls ->read_done() etc, and
nfs_readpage_retry(), which subsequently handles short reads.

Note: Removing the code that retries in case of a short read also fixes a
bug in nfs_direct_read_result(), which used to return a corrupted number of
bytes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 140 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 69 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8b58bbf6e39..e879ee6e138 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -460,6 +460,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
 	return error;
 }
 
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+	int status;
+
+	dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+			task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
+
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+	if (task->tk_status == -ESTALE) {
+		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+		nfs_mark_for_revalidate(data->inode);
+	}
+	spin_lock(&data->inode->i_lock);
+	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&data->inode->i_lock);
+	return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+	struct nfs_readargs *argp = &data->args;
+	struct nfs_readres *resp = &data->res;
+
+	if (resp->eof || resp->count == argp->count)
+		return 0;
+
+	/* This is a short read! */
+	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+	/* Has the server at least made some progress? */
+	if (resp->count == 0)
+		return 0;
+
+	/* Yes, so retry the read at the end of the data */
+	argp->offset += resp->count;
+	argp->pgbase += resp->count;
+	argp->count -= resp->count;
+	rpc_restart_call(task);
+	return -EAGAIN;
+}
+
 /*
  * Handle a read reply that fills part of a page.
  */
@@ -469,12 +518,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (likely(task->tk_status >= 0))
-		nfs_readpage_truncate_uninitialised_page(data);
-	else
-		SetPageError(page);
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+
+	if (likely(task->tk_status >= 0)) {
+		nfs_readpage_truncate_uninitialised_page(data);
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
+	if (unlikely(task->tk_status < 0))
+		SetPageError(page);
 	if (atomic_dec_and_test(&req->wb_complete)) {
 		if (!PageError(page))
 			SetPageUptodate(page);
@@ -502,25 +555,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	if (count != 0)
+	if (count == 0)
+		return;
+	/* Was this a short read? */
+	if (data->res.eof || data->res.count == data->args.count)
 		SetPageUptodate(*pages);
 }
 
-static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
-{
-	unsigned int count = data->args.count;
-	unsigned int base = data->args.pgbase;
-	struct page **pages;
-
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	count += base;
-	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-		SetPageError(*pages);
-	if (count != 0)
-		SetPageError(*pages);
-}
-
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -529,19 +570,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	/*
-	 * Note: nfs_readpage_result may change the values of
+	 * Note: nfs_readpage_retry may change the values of
 	 * data->args. In the multi-page case, we therefore need
-	 * to ensure that we call the next nfs_readpage_set_page_uptodate()
-	 * first in the multi-page case.
+	 * to ensure that we call nfs_readpage_set_pages_uptodate()
+	 * first.
 	 */
 	if (likely(task->tk_status >= 0)) {
 		nfs_readpage_truncate_uninitialised_page(data);
 		nfs_readpage_set_pages_uptodate(data);
-	} else
-		nfs_readpage_set_pages_error(data);
-	if (nfs_readpage_result(task, data) != 0)
-		return;
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 
@@ -555,50 +597,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
 	.rpc_release = nfs_readdata_release,
 };
 
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
-	struct nfs_readargs *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
-	int status;
-
-	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, task->tk_status);
-
-	status = NFS_PROTO(data->inode)->read_done(task, data);
-	if (status != 0)
-		return status;
-
-	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
-	if (task->tk_status < 0) {
-		if (task->tk_status == -ESTALE) {
-			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
-			nfs_mark_for_revalidate(data->inode);
-		}
-	} else if (resp->count < argp->count && !resp->eof) {
-		/* This is a short read! */
-		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
-		/* Has the server at least made some progress? */
-		if (resp->count != 0) {
-			/* Yes, so retry the read at the end of the data */
-			argp->offset += resp->count;
-			argp->pgbase += resp->count;
-			argp->count -= resp->count;
-			rpc_restart_call(task);
-			return -EAGAIN;
-		}
-		task->tk_status = -EIO;
-	}
-	spin_lock(&data->inode->i_lock);
-	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&data->inode->i_lock);
-	return 0;
-}
-
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
-- 
cgit v1.2.3


From cf1308ff7829017dab0dbcc817c63dc9c212923e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 19 Nov 2006 16:44:52 -0500
Subject: NFS: Fix missing page_unlock() in nfs_readpage

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e879ee6e138..29094a54508 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,12 +145,12 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 {
 	unsigned int	rsize = NFS_SERVER(inode)->rsize;
 	unsigned int	count = PAGE_CACHE_SIZE;
-	int		result;
+	int result = -ENOMEM;
 	struct nfs_read_data *rdata;
 
 	rdata = nfs_readdata_alloc(count);
 	if (!rdata)
-		return -ENOMEM;
+		goto out_unlock;
 
 	memset(rdata, 0, sizeof(*rdata));
 	rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
@@ -218,8 +218,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	result = 0;
 
 io_error:
-	unlock_page(page);
 	nfs_readdata_free(rdata);
+out_unlock:
+	unlock_page(page);
 	return result;
 }
 
@@ -630,9 +631,10 @@ int nfs_readpage(struct file *file, struct page *page)
 		goto out_error;
 
 	if (file == NULL) {
+		error = -EBADF;
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
-			return -EBADF;
+			goto out_error;
 	} else
 		ctx = get_nfs_open_context((struct nfs_open_context *)
 				file->private_data);
-- 
cgit v1.2.3


From e8e058e830f46a76f837522e5e2df46d4303111f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 15 Nov 2006 17:31:56 -0500
Subject: NFS: Fix nfs_sync_inode_wait(FLUSH_INVALIDATE)

Currently nfs_sync_inode_wait() will fail to loop correctly when we call
nfs_sync_inode_wait with the FLUSH_INVALIDATE argument.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 29d88209199..9b8eb9373ad 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1525,9 +1525,10 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 		pages = nfs_scan_dirty(inode, &head, idx_start, npages);
 		if (pages != 0) {
 			spin_unlock(&nfsi->req_lock);
-			if (how & FLUSH_INVALIDATE)
+			if (how & FLUSH_INVALIDATE) {
 				nfs_cancel_dirty_list(&head);
-			else
+				ret = pages;
+			} else
 				ret = nfs_flush_list(inode, &head, pages, how);
 			spin_lock(&nfsi->req_lock);
 			continue;
@@ -1540,6 +1541,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 		if (how & FLUSH_INVALIDATE) {
 			spin_unlock(&nfsi->req_lock);
 			nfs_cancel_commit_list(&head);
+			ret = pages;
 			spin_lock(&nfsi->req_lock);
 			continue;
 		}
-- 
cgit v1.2.3


From a99b71c9c43499bf2312c64f5c1d367aaf559dc4 Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Tue, 17 Oct 2006 10:24:36 -0700
Subject: NFS: Remove use of the Big Kernel Lock around calls to rpc_execute.

Remove use of the Big Kernel Lock around calls to rpc_execute.

Signed-off-by: Frank Filz <ffilz@us.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 6 ------
 fs/nfs/read.c   | 2 --
 fs/nfs/write.c  | 2 --
 3 files changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bdfabf854a5..784bbb54e6c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -307,9 +307,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 
 		data->task.tk_cookie = (unsigned long) inode;
 
-		lock_kernel();
 		rpc_execute(&data->task);
-		unlock_kernel();
 
 		dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
@@ -475,9 +473,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
-	lock_kernel();
 	rpc_execute(&data->task);
-	unlock_kernel();
 }
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@ -641,9 +637,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 		data->task.tk_priority = RPC_PRIORITY_NORMAL;
 		data->task.tk_cookie = (unsigned long) inode;
 
-		lock_kernel();
 		rpc_execute(&data->task);
-		unlock_kernel();
 
 		dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 29094a54508..cca9fa25999 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -323,9 +323,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
 	sigset_t oldset;
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	lock_kernel();
 	rpc_execute(&data->task);
-	unlock_kernel();
 	rpc_clnt_sigunmask(clnt, &oldset);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9b8eb9373ad..4948dc1fbd7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -991,9 +991,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
 	sigset_t oldset;
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	lock_kernel();
 	rpc_execute(&data->task);
-	unlock_kernel();
 	rpc_clnt_sigunmask(clnt, &oldset);
 }
 
-- 
cgit v1.2.3


From cae823c4c089d2c7c2a90f39f20376ccd85f9893 Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Tue, 17 Oct 2006 10:24:38 -0700
Subject: NFS: Remove use of the Big Kernel Lock around calls to rpc_call_sync

Remove use of the Big Kernel Lock around calls to rpc_call_sync.

Signed-off-by: Frank Filz <ffilz@us.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c | 3 ---
 fs/nfs/nfs4proc.c | 2 --
 fs/nfs/proc.c     | 3 ---
 3 files changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 9d7ff4f83fa..7fba06306b8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -690,8 +690,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	int			status;
 
-	lock_kernel();
-
 	if (plus)
 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
 
@@ -702,7 +700,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply readdir: %d\n", status);
-	unlock_kernel();
 	return status;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 93ac05889cb..4b5fc714d78 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2223,13 +2223,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
 			(unsigned long long)cookie);
-	lock_kernel();
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
-	unlock_kernel();
 	dprintk("%s: returns %d\n", __FUNCTION__, status);
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4529cc4f3f8..7a9013dd5d0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -545,13 +545,10 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	int			status;
 
-	lock_kernel();
-
 	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 
 	dprintk("NFS reply readdir: %d\n", status);
-	unlock_kernel();
 	return status;
 }
 
-- 
cgit v1.2.3


From eb5f8545ffff98a11c6656d4d2106341ab69c57d Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Tue, 17 Oct 2006 10:24:42 -0700
Subject: NFS: Remove use of the Big Kernel Lock around nfs calls to readlink

Remove use of the Big Kernel Lock around indirect calls to
nfs3_proc_readlink and nfs4_proc_readlink, both of which
basically call rpc_call_sync.

Signed-off-by: Frank Filz <ffilz@us.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/symlink.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 600bbe630ab..6c686112cc0 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -33,9 +33,7 @@ static int nfs_symlink_filler(struct inode *inode, struct page *page)
 {
 	int error;
 
-	lock_kernel();
 	error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
-	unlock_kernel();
 	if (error < 0)
 		goto error;
 	SetPageUptodate(page);
-- 
cgit v1.2.3


From 28c6925fce3927a9fe3c5b44af5fb266680fdcea Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 16 Sep 2006 13:04:50 -0400
Subject: NFS: Clean up nfs_flush_inode()

Make it take a struct writepages argument, and rename to
nfs_flush_mapping().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4948dc1fbd7..1d72096c4d2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -79,8 +79,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    unsigned int, unsigned int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
-static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-			   unsigned int npages, int how);
+static int nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -352,7 +351,7 @@ do_it:
 	if (!IS_SYNC(inode) && inode_referenced) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
 		if (!wbc->for_writepages)
-			nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
+			nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
 	} else {
 		err = nfs_writepage_sync(ctx, inode, page, 0,
 						offset, priority);
@@ -391,11 +390,10 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 			return 0;
 		nfs_wait_on_write_congestion(mapping, 0);
 	}
-	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
+	err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
 	if (err < 0)
 		goto out;
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
-	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
 		if (err < 0)
@@ -1469,20 +1467,22 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 }
 #endif
 
-static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-			   unsigned int npages, int how)
+static int nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_inode *nfsi = NFS_I(mapping->host);
 	LIST_HEAD(head);
+	pgoff_t index = wbc->range_start >> PAGE_CACHE_SHIFT;
+	unsigned long npages = 1 + (wbc->range_end >> PAGE_CACHE_SHIFT) - index;
 	int res;
 
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_dirty(inode, &head, idx_start, npages);
+	res = nfs_scan_dirty(mapping->host, &head, index, npages);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		int error = nfs_flush_list(inode, &head, res, how);
+		int error = nfs_flush_list(mapping->host, &head, res, how);
 		if (error < 0)
 			return error;
+		wbc->nr_to_write -= res;
 	}
 	return res;
 }
-- 
cgit v1.2.3


From 3f442547b76bf9fb70d7aecc41cf1980459253c9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 17 Sep 2006 14:46:44 -0400
Subject: NFS: Clean up nfs_scan_dirty()

Pass down struct writeback control.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 76 +++++++++++++++++++++++++++++++++++++------------------
 fs/nfs/write.c    | 54 ++++++++++++---------------------------
 2 files changed, 67 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 829af323f28..e046c9b6a9d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include <linux/writeback.h>
 
 #define NFS_PARANOIA 1
 
@@ -268,11 +269,10 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 
 #define NFS_SCAN_MAXENTRIES 16
 /**
- * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
- * @nfsi: NFS inode
+ * nfs_scan_dirty - Scan the radix tree for dirty requests
+ * @mapping: pointer to address space
+ * @wbc: writeback_control structure
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
- * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
  * If the number of requests is set to 0, the entire address_space
@@ -280,46 +280,72 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
  * The requests are *not* checked to ensure that they form a contiguous set.
  * You must be holding the inode's req_lock when calling this function
  */
-int
-nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
-	      unsigned long idx_start, unsigned int npages)
+long nfs_scan_dirty(struct address_space *mapping,
+			struct writeback_control *wbc,
+			struct list_head *dst)
 {
+	struct nfs_inode *nfsi = NFS_I(mapping->host);
 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 	struct nfs_page *req;
-	unsigned long idx_end;
+	pgoff_t idx_start, idx_end;
+	long count = wbc->nr_to_write;
+	long res = 0;
 	int found, i;
-	int res;
 
-	res = 0;
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
+	if (nfsi->ndirty == 0 || count <= 0)
+		return 0;
+	if (wbc->range_cyclic) {
+		idx_start = 0;
+		idx_end = ULONG_MAX;
+	} else if (wbc->range_end == 0) {
+		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+		idx_end = ULONG_MAX;
+	} else {
+		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
+	}
 
 	for (;;) {
+		unsigned int toscan = NFS_SCAN_MAXENTRIES;
+
+		if (toscan > count)
+			toscan = count;
 		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
-				(void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES,
+				(void **)&pgvec[0], idx_start, toscan,
 				NFS_PAGE_TAG_DIRTY);
+
+		/* Did we make progress? */
 		if (found <= 0)
 			break;
+
 		for (i = 0; i < found; i++) {
 			req = pgvec[i];
-			if (req->wb_index > idx_end)
+			if (!wbc->range_cyclic && req->wb_index > idx_end)
 				goto out;
 
-			idx_start = req->wb_index + 1;
+			/* Try to lock request and mark it for writeback */
+			if (!nfs_set_page_writeback_locked(req))
+				goto next;
+			radix_tree_tag_clear(&nfsi->nfs_page_tree,
+					req->wb_index, NFS_PAGE_TAG_DIRTY);
+			nfsi->ndirty--;
+			nfs_list_remove_request(req);
+			nfs_list_add_request(req, dst);
+			dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
+			res++;
+			if (res == LONG_MAX)
+				goto out;
+			count--;
+			if (count == 0)
+				goto out;
 
-			if (nfs_set_page_writeback_locked(req)) {
-				radix_tree_tag_clear(&nfsi->nfs_page_tree,
-						req->wb_index, NFS_PAGE_TAG_DIRTY);
-				nfs_list_remove_request(req);
-				nfs_list_add_request(req, dst);
-				dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
-				res++;
-			}
+next:
+			idx_start = req->wb_index + 1;
 		}
 	}
 out:
+	wbc->nr_to_write = count;
+	WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
 	return res;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1d72096c4d2..dbc89fa7e9d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -79,7 +79,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    unsigned int, unsigned int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
-static int nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
+static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -400,10 +400,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 			goto out;
 	}
 	err = nfs_commit_inode(inode, wb_priority(wbc));
-	if (err > 0) {
-		wbc->nr_to_write -= err;
+	if (err > 0)
 		err = 0;
-	}
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
@@ -607,31 +605,6 @@ static void nfs_cancel_commit_list(struct list_head *head)
 	}
 }
 
-/*
- * nfs_scan_dirty - Scan an inode for dirty requests
- * @inode: NFS inode to scan
- * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
- * @npages: idx_start + npages sets the upper bound to scan.
- *
- * Moves requests from the inode's dirty page list.
- * The requests are *not* checked to ensure that they form a contiguous set.
- */
-static int
-nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int res = 0;
-
-	if (nfsi->ndirty != 0) {
-		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
-		nfsi->ndirty -= res;
-		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
-			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
-	}
-	return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 /*
  * nfs_scan_commit - Scan an inode for commit requests
@@ -1467,22 +1440,19 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 }
 #endif
 
-static int nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
+static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(mapping->host);
 	LIST_HEAD(head);
-	pgoff_t index = wbc->range_start >> PAGE_CACHE_SHIFT;
-	unsigned long npages = 1 + (wbc->range_end >> PAGE_CACHE_SHIFT) - index;
-	int res;
+	long res;
 
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_dirty(mapping->host, &head, index, npages);
+	res = nfs_scan_dirty(mapping, wbc, &head);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
 		int error = nfs_flush_list(mapping->host, &head, res, how);
 		if (error < 0)
 			return error;
-		wbc->nr_to_write -= res;
 	}
 	return res;
 }
@@ -1506,13 +1476,21 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+long nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 		unsigned int npages, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	struct address_space *mapping = inode->i_mapping;
+	struct writeback_control wbc = {
+		.bdi = mapping->backing_dev_info,
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_start = ((loff_t)idx_start) << PAGE_CACHE_SHIFT,
+		.range_end = ((loff_t)(idx_start + npages - 1)) << PAGE_CACHE_SHIFT,
+	};
 	LIST_HEAD(head);
 	int nocommit = how & FLUSH_NOCOMMIT;
-	int pages, ret;
+	long pages, ret;
 
 	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&nfsi->req_lock);
@@ -1520,7 +1498,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+		pages = nfs_scan_dirty(mapping, &wbc, &head);
 		if (pages != 0) {
 			spin_unlock(&nfsi->req_lock);
 			if (how & FLUSH_INVALIDATE) {
-- 
cgit v1.2.3


From 1c75950b9a2254ef08f986e00ad20266ae9ba7f1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 9 Oct 2006 16:18:38 -0400
Subject: NFS: cleanup of nfs_sync_inode_wait()

Allow callers to directly pass it a struct writeback_control.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c  |  9 ++++--
 fs/nfs/inode.c |  2 +-
 fs/nfs/write.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 88 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc93865cea9..d6ee60fc3ba 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -307,11 +307,14 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
-	struct inode *inode = page->mapping->host;
+	loff_t range_start, range_end;
 
+	if (offset != 0)
+		return;
 	/* Cancel any unstarted writes on this page */
-	if (offset == 0)
-		nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
+	range_start = page_offset(page);
+	range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	nfs_sync_mapping_range(page->mapping, range_start, range_end, FLUSH_INVALIDATE);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 08cc4c5919a..7c32187f953 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -422,7 +422,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int err;
 
 	/* Flush out writes to the server in order to update c/mtime */
-	nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
+	nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dbc89fa7e9d..310fdeca625 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,6 +80,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
+static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -1476,29 +1477,38 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-long nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
-		unsigned int npages, int how)
+long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
+	struct inode *inode = mapping->host;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct address_space *mapping = inode->i_mapping;
-	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = ((loff_t)idx_start) << PAGE_CACHE_SHIFT,
-		.range_end = ((loff_t)(idx_start + npages - 1)) << PAGE_CACHE_SHIFT,
-	};
+	unsigned long idx_start, idx_end;
+	unsigned int npages = 0;
 	LIST_HEAD(head);
 	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
+	/* FIXME */
+	if (wbc->range_cyclic)
+		idx_start = 0;
+	else {
+		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (idx_end > idx_start) {
+			unsigned long l_npages = 1 + idx_end - idx_start;
+			npages = l_npages;
+			if (sizeof(npages) != sizeof(l_npages) &&
+					(unsigned long)npages != l_npages)
+				npages = 0;
+		}
+	}
 	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&nfsi->req_lock);
 	do {
+		wbc->pages_skipped = 0;
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		pages = nfs_scan_dirty(mapping, &wbc, &head);
+		pages = nfs_scan_dirty(mapping, wbc, &head);
 		if (pages != 0) {
 			spin_unlock(&nfsi->req_lock);
 			if (how & FLUSH_INVALIDATE) {
@@ -1509,11 +1519,16 @@ long nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 			spin_lock(&nfsi->req_lock);
 			continue;
 		}
+		if (wbc->pages_skipped != 0)
+			continue;
 		if (nocommit)
 			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
+		if (pages == 0) {
+			if (wbc->pages_skipped != 0)
+				continue;
 			break;
+		}
 		if (how & FLUSH_INVALIDATE) {
 			spin_unlock(&nfsi->req_lock);
 			nfs_cancel_commit_list(&head);
@@ -1530,6 +1545,60 @@ long nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 	return ret;
 }
 
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct writeback_control wbc = {
+		.bdi = mapping->backing_dev_info,
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_cyclic = 1,
+	};
+	int ret;
+
+	ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
+	if (ret >= 0)
+		return 0;
+	return ret;
+}
+
+int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
+{
+	struct writeback_control wbc = {
+		.bdi = mapping->backing_dev_info,
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+	int ret;
+
+	ret = nfs_sync_mapping_wait(mapping, &wbc, how);
+	if (ret >= 0)
+		return 0;
+	return ret;
+}
+
+static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
+{
+	loff_t range_start = page_offset(page);
+	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+
+	return nfs_sync_mapping_range(inode->i_mapping, range_start, range_end, how | FLUSH_STABLE);
+}
+
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page* page)
+{
+	return nfs_wb_page_priority(inode, page, 0);
+}
+
+
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-- 
cgit v1.2.3


From 277459d2e2cd40594967757e8fd016c4c7016614 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:35 -0500
Subject: NFS: Store pointer to the nfs_page in page->private

This will allow fast lookup of the nfs_page from the struct page instead of
having to search the radix tree.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 66 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 310fdeca625..6df8319de06 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -155,6 +155,29 @@ void nfs_writedata_release(void *wdata)
 	nfs_writedata_free(wdata);
 }
 
+static struct nfs_page *nfs_page_find_request_locked(struct page *page)
+{
+	struct nfs_page *req = NULL;
+
+	if (PagePrivate(page)) {
+		req = (struct nfs_page *)page_private(page);
+		if (req != NULL)
+			atomic_inc(&req->wb_count);
+	}
+	return req;
+}
+
+static struct nfs_page *nfs_page_find_request(struct page *page)
+{
+	struct nfs_page *req = NULL;
+	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+
+	spin_lock(req_lock);
+	req = nfs_page_find_request_locked(page);
+	spin_unlock(req_lock);
+	return req;
+}
+
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
@@ -429,6 +452,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 			nfsi->change_attr++;
 	}
 	SetPagePrivate(req->wb_page);
+	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -445,6 +469,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
+	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
@@ -458,33 +483,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
-/*
- * Find a request
- */
-static inline struct nfs_page *
-_nfs_find_request(struct inode *inode, unsigned long index)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-
-	req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
-	if (req)
-		atomic_inc(&req->wb_count);
-	return req;
-}
-
-static struct nfs_page *
-nfs_find_request(struct inode *inode, unsigned long index)
-{
-	struct nfs_page		*req;
-	struct nfs_inode	*nfsi = NFS_I(inode);
-
-	spin_lock(&nfsi->req_lock);
-	req = _nfs_find_request(inode, index);
-	spin_unlock(&nfsi->req_lock);
-	return req;
-}
-
 /*
  * Add a request to the inode's dirty list.
  */
@@ -699,10 +697,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		 * A request for the page we wish to update
 		 */
 		spin_lock(&nfsi->req_lock);
-		req = _nfs_find_request(inode, page->index);
+		req = nfs_page_find_request_locked(page);
 		if (req) {
 			if (!nfs_lock_request_dontget(req)) {
 				int error;
+
 				spin_unlock(&nfsi->req_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
@@ -770,7 +769,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 int nfs_flush_incompatible(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = page->mapping->host;
 	struct nfs_page	*req;
 	int		status = 0;
 	/*
@@ -781,11 +779,13 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 	 * Also do the same if we find a request from an existing
 	 * dropped page.
 	 */
-	req = nfs_find_request(inode, page->index);
-	if (req) {
-		if (req->wb_page != page || ctx != req->wb_context)
-			status = nfs_wb_page(inode, page);
+	req = nfs_page_find_request(page);
+	if (req != NULL) {
+		int do_flush = req->wb_page != page || req->wb_context != ctx;
+
 		nfs_release_request(req);
+		if (do_flush)
+			status = nfs_wb_page(page->mapping->host, page);
 	}
 	return (status < 0) ? status : 0;
 }
-- 
cgit v1.2.3


From 49a70f278658894d2899824cd4037095fb6711fe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:38 -0500
Subject: NFS: Cleanup: add common helper nfs_page_length()

Clean up a lot of ad-hoc page length calculations in fs/nfs/write.c

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h | 18 ++++++++++++++++++
 fs/nfs/read.c     | 21 +++------------------
 fs/nfs/write.c    | 55 ++++++++++---------------------------------------------
 3 files changed, 31 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d205466233f..a28f6ce2e13 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -217,3 +217,21 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
+
+/*
+ * Determine the number of bytes of data the page contains
+ */
+static inline
+unsigned int nfs_page_length(struct page *page)
+{
+	loff_t i_size = i_size_read(page->mapping->host);
+
+	if (i_size > 0) {
+		pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+		if (page->index < end_index)
+			return PAGE_CACHE_SIZE;
+		if (page->index == end_index)
+			return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+	}
+	return 0;
+}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cca9fa25999..05cca660997 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,6 +30,7 @@
 
 #include <asm/system.h>
 
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
@@ -83,22 +84,6 @@ void nfs_readdata_release(void *data)
         nfs_readdata_free(data);
 }
 
-static
-unsigned int nfs_page_length(struct inode *inode, struct page *page)
-{
-	loff_t i_size = i_size_read(inode);
-	unsigned long idx;
-
-	if (i_size <= 0)
-		return 0;
-	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index > idx)
-		return 0;
-	if (page->index != idx)
-		return PAGE_CACHE_SIZE;
-	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
-}
-
 static
 int nfs_return_empty_page(struct page *page)
 {
@@ -231,7 +216,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	struct nfs_page	*new;
 	unsigned int len;
 
-	len = nfs_page_length(inode, page);
+	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
 	new = nfs_create_request(ctx, inode, page, 0, len);
@@ -667,7 +652,7 @@ readpage_async_filler(void *data, struct page *page)
 	unsigned int len;
 
 	nfs_wb_page(inode, page);
-	len = nfs_page_length(inode, page);
+	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6df8319de06..3f6ca522956 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
@@ -199,30 +200,15 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
  */
 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
 {
-	loff_t end_offs;
-
 	if (PageUptodate(page))
 		return;
 	if (base != 0)
 		return;
-	if (count == PAGE_CACHE_SIZE) {
-		SetPageUptodate(page);
-		return;
-	}
-
-	end_offs = i_size_read(page->mapping->host) - 1;
-	if (end_offs < 0)
-		return;
-	/* Is this the last page? */
-	if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
+	if (count != nfs_page_length(page))
 		return;
-	/* This is the last page: set PG_uptodate if we cover the entire
-	 * extent of the data, then zero the rest of the page.
-	 */
-	if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
+	if (count != PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
-		SetPageUptodate(page);
-	}
+	SetPageUptodate(page);
 }
 
 /*
@@ -330,9 +316,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
-	unsigned long end_index;
-	unsigned offset = PAGE_CACHE_SIZE;
-	loff_t i_size = i_size_read(inode);
+	unsigned offset;
 	int inode_referenced = 0;
 	int priority = wb_priority(wbc);
 	int err;
@@ -350,22 +334,15 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	 */
 	if (igrab(inode) != 0)
 		inode_referenced = 1;
-	end_index = i_size >> PAGE_CACHE_SHIFT;
 
 	/* Ensure we've flushed out any previous writes */
 	nfs_wb_page_priority(inode, page, priority);
 
-	/* easy case */
-	if (page->index < end_index)
-		goto do_it;
-	/* things got complicated... */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
-
-	/* OK, are we completely out? */
-	err = 0; /* potential race with truncate - ignore */
-	if (page->index >= end_index+1 || !offset)
+	err = 0;
+	offset = nfs_page_length(page);
+	if (!offset)
 		goto out;
-do_it:
+
 	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
 	if (ctx == NULL) {
 		err = -EBADF;
@@ -826,20 +803,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 * fragmenting write requests.
 	 */
 	if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
-		loff_t end_offs = i_size_read(inode) - 1;
-		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
-
-		count += offset;
+		count = max(count + offset, nfs_page_length(page));
 		offset = 0;
-		if (unlikely(end_offs < 0)) {
-			/* Do nothing */
-		} else if (page->index == end_index) {
-			unsigned int pglen;
-			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
-			if (count < pglen)
-				count = pglen;
-		} else if (page->index < end_index)
-			count = PAGE_CACHE_SIZE;
 	}
 
 	/*
-- 
cgit v1.2.3


From 87a4ce16082e92b4b6d27596a517b302f3692650 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:39 -0500
Subject: NFS: Remove call to igrab() from nfs_writepage()

We always ensure that the nfs_open_context holds a reference to the dentry,
so the test in nfs_writepage() for whether or not the inode is referenced
is redundant.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3f6ca522956..74e86601d97 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -317,24 +317,12 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
 	unsigned offset;
-	int inode_referenced = 0;
 	int priority = wb_priority(wbc);
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
-	/*
-	 * Note: We need to ensure that we have a reference to the inode
-	 *       if we are to do asynchronous writes. If not, waiting
-	 *       in nfs_wait_on_request() may deadlock with clear_inode().
-	 *
-	 *       If igrab() fails here, then it is in any case safe to
-	 *       call nfs_wb_page(), since there will be no pending writes.
-	 */
-	if (igrab(inode) != 0)
-		inode_referenced = 1;
-
 	/* Ensure we've flushed out any previous writes */
 	nfs_wb_page_priority(inode, page, priority);
 
@@ -349,7 +337,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 		goto out;
 	}
 	lock_kernel();
-	if (!IS_SYNC(inode) && inode_referenced) {
+	if (!IS_SYNC(inode)) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
 		if (!wbc->for_writepages)
 			nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
@@ -366,8 +354,6 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	put_nfs_open_context(ctx);
 out:
 	unlock_page(page);
-	if (inode_referenced)
-		iput(inode);
 	return err; 
 }
 
-- 
cgit v1.2.3


From e21195a740533348e77efa8a2e2cf03bb4092b2b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:39 -0500
Subject: NFS: More cleanups of fs/nfs/write.c

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 68 +++++++++++++++++++++-------------------------------------
 1 file changed, 24 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 74e86601d97..de9a16a8f7e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -75,7 +75,6 @@
  * Local function declarations
  */
 static struct nfs_page * nfs_update_request(struct nfs_open_context*,
-					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
@@ -215,10 +214,10 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
  * Write a page synchronously.
  * Offset is the data offset within the page.
  */
-static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
-		struct page *page, unsigned int offset, unsigned int count,
-		int how)
+static int nfs_writepage_sync(struct nfs_open_context *ctx, struct page *page,
+		unsigned int offset, unsigned int count, int how)
 {
+	struct inode *inode = page->mapping->host;
 	unsigned int	wsize = NFS_SERVER(inode)->wsize;
 	int		result, written = 0;
 	struct nfs_write_data *wdata;
@@ -283,15 +282,23 @@ io_error:
 	return written ? written : result;
 }
 
-static int nfs_writepage_async(struct nfs_open_context *ctx,
-		struct inode *inode, struct page *page,
+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 		unsigned int offset, unsigned int count)
 {
 	struct nfs_page	*req;
+	int ret;
 
-	req = nfs_update_request(ctx, inode, page, offset, count);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	for (;;) {
+		req = nfs_update_request(ctx, page, offset, count);
+		if (!IS_ERR(req))
+			break;
+		ret = PTR_ERR(req);
+		if (ret != -EBUSY)
+			return ret;
+		ret = nfs_wb_page(page->mapping->host, page);
+		if (ret != 0)
+			return ret;
+	}
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	/* Set the PG_uptodate flag? */
@@ -317,14 +324,13 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
 	unsigned offset;
-	int priority = wb_priority(wbc);
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
 	/* Ensure we've flushed out any previous writes */
-	nfs_wb_page_priority(inode, page, priority);
+	nfs_wb_page_priority(inode, page, wb_priority(wbc));
 
 	err = 0;
 	offset = nfs_page_length(page);
@@ -338,12 +344,11 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	}
 	lock_kernel();
 	if (!IS_SYNC(inode)) {
-		err = nfs_writepage_async(ctx, inode, page, 0, offset);
+		err = nfs_writepage_setup(ctx, page, 0, offset);
 		if (!wbc->for_writepages)
 			nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
 	} else {
-		err = nfs_writepage_sync(ctx, inode, page, 0,
-						offset, priority);
+		err = nfs_writepage_sync(ctx, page, 0, offset, wb_priority(wbc));
 		if (err >= 0) {
 			if (err != offset)
 				redirty_page_for_writepage(wbc, page);
@@ -643,17 +648,16 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
  * Note: Should always be called with the Page Lock held!
  */
 static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
-		struct inode *inode, struct page *page,
-		unsigned int offset, unsigned int bytes)
+		struct page *page, unsigned int offset, unsigned int bytes)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
+	struct inode *inode = page->mapping->host;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	unsigned long		rqend, end;
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+	if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR))
 		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
@@ -764,7 +768,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
 	struct inode	*inode = page->mapping->host;
-	struct nfs_page	*req;
 	int		status = 0;
 
 	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -775,7 +778,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 		(long long)(page_offset(page) +offset));
 
 	if (IS_SYNC(inode)) {
-		status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
+		status = nfs_writepage_sync(ctx, page, offset, count, 0);
 		if (status > 0) {
 			if (offset == 0 && status == PAGE_CACHE_SIZE)
 				SetPageUptodate(page);
@@ -793,31 +796,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 		offset = 0;
 	}
 
-	/*
-	 * Try to find an NFS request corresponding to this page
-	 * and update it.
-	 * If the existing request cannot be updated, we must flush
-	 * it out now.
-	 */
-	do {
-		req = nfs_update_request(ctx, inode, page, offset, count);
-		status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
-		if (status != -EBUSY)
-			break;
-		/* Request could not be updated. Flush it out and try again */
-		status = nfs_wb_page(inode, page);
-	} while (status >= 0);
-	if (status < 0)
-		goto done;
-
-	status = 0;
+	status = nfs_writepage_setup(ctx, page, offset, count);
 
-	/* Update file length */
-	nfs_grow_file(page, offset, count);
-	/* Set the PG_uptodate flag? */
-	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
-	nfs_unlock_request(req);
-done:
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
 			status, (long long)i_size_read(inode));
 	if (status < 0)
-- 
cgit v1.2.3


From 200baa2112012dd8a13db9da3ee6885403f9c013 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:40 -0500
Subject: NFS: Remove nfs_writepage_sync()

Maintaining two parallel ways of doing synchronous writes is rather
pointless. This patch gets rid of the legacy nfs_writepage_sync(), and
replaces it with the faster asynchronous writes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c     |   6 ++++
 fs/nfs/nfs3proc.c |  47 -------------------------
 fs/nfs/nfs4proc.c |  85 ----------------------------------------------
 fs/nfs/proc.c     |  28 ---------------
 fs/nfs/write.c    | 100 +++---------------------------------------------------
 5 files changed, 10 insertions(+), 256 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d6ee60fc3ba..143a19037ce 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -378,6 +378,12 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 
 	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	/* Return error values for O_SYNC and IS_SYNC() */
+	if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
+		int err = nfs_fsync(iocb->ki_filp, dentry, 1);
+		if (err < 0)
+			result = err;
+	}
 out:
 	return result;
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7fba06306b8..510ae524f3f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -276,51 +276,6 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
 	return status;
 }
 
-static int nfs3_proc_write(struct nfs_write_data *wdata)
-{
-	int			rpcflags = wdata->flags;
-	struct inode *		inode = wdata->inode;
-	struct nfs_fattr *	fattr = wdata->res.fattr;
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
-		.rpc_argp	= &wdata->args,
-		.rpc_resp	= &wdata->res,
-		.rpc_cred	= wdata->cred,
-	};
-	int			status;
-
-	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-			(long long) wdata->args.offset);
-	nfs_fattr_init(fattr);
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
-	if (status >= 0)
-		nfs_post_op_update_inode(inode, fattr);
-	dprintk("NFS reply write: %d\n", status);
-	return status < 0? status : wdata->res.count;
-}
-
-static int nfs3_proc_commit(struct nfs_write_data *cdata)
-{
-	struct inode *		inode = cdata->inode;
-	struct nfs_fattr *	fattr = cdata->res.fattr;
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
-		.rpc_argp	= &cdata->args,
-		.rpc_resp	= &cdata->res,
-		.rpc_cred	= cdata->cred,
-	};
-	int			status;
-
-	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
-			(long long) cdata->args.offset);
-	nfs_fattr_init(fattr);
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-	if (status >= 0)
-		nfs_post_op_update_inode(inode, fattr);
-	dprintk("NFS reply commit: %d\n", status);
-	return status;
-}
-
 /*
  * Create a regular file.
  * For now, we don't implement O_EXCL.
@@ -901,8 +856,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.access		= nfs3_proc_access,
 	.readlink	= nfs3_proc_readlink,
 	.read		= nfs3_proc_read,
-	.write		= nfs3_proc_write,
-	.commit		= nfs3_proc_commit,
 	.create		= nfs3_proc_create,
 	.remove		= nfs3_proc_remove,
 	.unlink_setup	= nfs3_proc_unlink_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b5fc714d78..ee458aeab24 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1775,89 +1775,6 @@ static int nfs4_proc_read(struct nfs_read_data *rdata)
 	return err;
 }
 
-static int _nfs4_proc_write(struct nfs_write_data *wdata)
-{
-	int rpcflags = wdata->flags;
-	struct inode *inode = wdata->inode;
-	struct nfs_fattr *fattr = wdata->res.fattr;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_WRITE],
-		.rpc_argp	= &wdata->args,
-		.rpc_resp	= &wdata->res,
-		.rpc_cred	= wdata->cred,
-	};
-	int status;
-
-	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-			(long long) wdata->args.offset);
-
-	wdata->args.bitmask = server->attr_bitmask;
-	wdata->res.server = server;
-	wdata->timestamp = jiffies;
-	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client, &msg, rpcflags);
-	dprintk("NFS reply write: %d\n", status);
-	if (status < 0)
-		return status;
-	renew_lease(server, wdata->timestamp);
-	nfs_post_op_update_inode(inode, fattr);
-	return wdata->res.count;
-}
-
-static int nfs4_proc_write(struct nfs_write_data *wdata)
-{
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
-				_nfs4_proc_write(wdata),
-				&exception);
-	} while (exception.retry);
-	return err;
-}
-
-static int _nfs4_proc_commit(struct nfs_write_data *cdata)
-{
-	struct inode *inode = cdata->inode;
-	struct nfs_fattr *fattr = cdata->res.fattr;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
-		.rpc_argp	= &cdata->args,
-		.rpc_resp	= &cdata->res,
-		.rpc_cred	= cdata->cred,
-	};
-	int status;
-
-	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
-			(long long) cdata->args.offset);
-
-	cdata->args.bitmask = server->attr_bitmask;
-	cdata->res.server = server;
-	cdata->timestamp = jiffies;
-	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client, &msg, 0);
-	if (status >= 0)
-		renew_lease(server, cdata->timestamp);
-	dprintk("NFS reply commit: %d\n", status);
-	if (status >= 0)
-		nfs_post_op_update_inode(inode, fattr);
-	return status;
-}
-
-static int nfs4_proc_commit(struct nfs_write_data *cdata)
-{
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
-				_nfs4_proc_commit(cdata),
-				&exception);
-	} while (exception.retry);
-	return err;
-}
-
 /*
  * Got race?
  * We will need to arrange for the VFS layer to provide an atomic open.
@@ -3730,8 +3647,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
 	.read		= nfs4_proc_read,
-	.write		= nfs4_proc_write,
-	.commit		= nfs4_proc_commit,
 	.create		= nfs4_proc_create,
 	.remove		= nfs4_proc_remove,
 	.unlink_setup	= nfs4_proc_unlink_setup,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7a9013dd5d0..10f5e80ca15 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -215,32 +215,6 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
 	return status;
 }
 
-static int nfs_proc_write(struct nfs_write_data *wdata)
-{
-	int			flags = wdata->flags;
-	struct inode *		inode = wdata->inode;
-	struct nfs_fattr *	fattr = wdata->res.fattr;
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
-		.rpc_argp	= &wdata->args,
-		.rpc_resp	= &wdata->res,
-		.rpc_cred	= wdata->cred,
-	};
-	int			status;
-
-	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-			(long long) wdata->args.offset);
-	nfs_fattr_init(fattr);
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
-	if (status >= 0) {
-		nfs_post_op_update_inode(inode, fattr);
-		wdata->res.count = wdata->args.count;
-		wdata->verf.committed = NFS_FILE_SYNC;
-	}
-	dprintk("NFS reply write: %d\n", status);
-	return status < 0? status : wdata->res.count;
-}
-
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		int flags, struct nameidata *nd)
@@ -693,8 +667,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.access		= NULL,		       /* access */
 	.readlink	= nfs_proc_readlink,
 	.read		= nfs_proc_read,
-	.write		= nfs_proc_write,
-	.commit		= NULL,		       /* commit */
 	.create		= nfs_proc_create,
 	.remove		= nfs_proc_remove,
 	.unlink_setup	= nfs_proc_unlink_setup,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de9a16a8f7e..f0720b544b1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -210,78 +210,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
 	SetPageUptodate(page);
 }
 
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int nfs_writepage_sync(struct nfs_open_context *ctx, struct page *page,
-		unsigned int offset, unsigned int count, int how)
-{
-	struct inode *inode = page->mapping->host;
-	unsigned int	wsize = NFS_SERVER(inode)->wsize;
-	int		result, written = 0;
-	struct nfs_write_data *wdata;
-
-	wdata = nfs_writedata_alloc(wsize);
-	if (!wdata)
-		return -ENOMEM;
-
-	wdata->flags = how;
-	wdata->cred = ctx->cred;
-	wdata->inode = inode;
-	wdata->args.fh = NFS_FH(inode);
-	wdata->args.context = ctx;
-	wdata->args.pages = &page;
-	wdata->args.stable = NFS_FILE_SYNC;
-	wdata->args.pgbase = offset;
-	wdata->args.count = wsize;
-	wdata->res.fattr = &wdata->fattr;
-	wdata->res.verf = &wdata->verf;
-
-	dprintk("NFS:      nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
-		inode->i_sb->s_id,
-		(long long)NFS_FILEID(inode),
-		count, (long long)(page_offset(page) + offset));
-
-	set_page_writeback(page);
-	nfs_begin_data_update(inode);
-	do {
-		if (count < wsize)
-			wdata->args.count = count;
-		wdata->args.offset = page_offset(page) + wdata->args.pgbase;
-
-		result = NFS_PROTO(inode)->write(wdata);
-
-		if (result < 0) {
-			/* Must mark the page invalid after I/O error */
-			ClearPageUptodate(page);
-			goto io_error;
-		}
-		if (result < wdata->args.count)
-			printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
-					wdata->args.count, result);
-
-		wdata->args.offset += result;
-	        wdata->args.pgbase += result;
-		written += result;
-		count -= result;
-		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
-	} while (count);
-	/* Update file length */
-	nfs_grow_file(page, offset, written);
-	/* Set the PG_uptodate flag? */
-	nfs_mark_uptodate(page, offset, written);
-
-	if (PageError(page))
-		ClearPageError(page);
-
-io_error:
-	nfs_end_data_update(inode);
-	end_page_writeback(page);
-	nfs_writedata_release(wdata);
-	return written ? written : result;
-}
-
 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 		unsigned int offset, unsigned int count)
 {
@@ -342,22 +270,12 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 		err = -EBADF;
 		goto out;
 	}
-	lock_kernel();
-	if (!IS_SYNC(inode)) {
-		err = nfs_writepage_setup(ctx, page, 0, offset);
-		if (!wbc->for_writepages)
-			nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
-	} else {
-		err = nfs_writepage_sync(ctx, page, 0, offset, wb_priority(wbc));
-		if (err >= 0) {
-			if (err != offset)
-				redirty_page_for_writepage(wbc, page);
-			err = 0;
-		}
-	}
-	unlock_kernel();
+	err = nfs_writepage_setup(ctx, page, 0, offset);
 	put_nfs_open_context(ctx);
+
 out:
+	if (!wbc->for_writepages)
+		nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
 	unlock_page(page);
 	return err; 
 }
@@ -777,16 +695,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 		file->f_dentry->d_name.name, count,
 		(long long)(page_offset(page) +offset));
 
-	if (IS_SYNC(inode)) {
-		status = nfs_writepage_sync(ctx, page, offset, count, 0);
-		if (status > 0) {
-			if (offset == 0 && status == PAGE_CACHE_SIZE)
-				SetPageUptodate(page);
-			return 0;
-		}
-		return status;
-	}
-
 	/* If we're not using byte range locks, and we know the page
 	 * is entirely in cache, it may be more efficient to avoid
 	 * fragmenting write requests.
-- 
cgit v1.2.3


From 1a54533ec8d92a5edae97ec6ae10023ee71c4b46 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:40 -0500
Subject: NFS: Add nfs_set_page_dirty()

We will want to allow nfs_writepage() to distinguish between pages that
have been marked as dirty by the VM, and those that have been marked as
dirty by nfs_updatepage().
In the former case, the entire page will want to be written out, and so any
requests that were pending need to be flushed out first.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c  |  2 +-
 fs/nfs/write.c | 49 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 143a19037ce..c2fe3bd83ab 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -331,7 +331,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
-	.set_page_dirty = __set_page_dirty_nobuffers,
+	.set_page_dirty = nfs_set_page_dirty,
 	.writepage = nfs_writepage,
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f0720b544b1..266fea71cfc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -251,16 +251,23 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
+	struct nfs_page *req;
 	unsigned offset;
-	int err;
+	int err = 0;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
-	/* Ensure we've flushed out any previous writes */
-	nfs_wb_page_priority(inode, page, wb_priority(wbc));
+	req = nfs_page_find_request(page);
+	if (req != NULL) {
+		int flushme = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+		nfs_release_request(req);
+		if (!flushme)
+			goto out;
+		/* Ensure we've flushed out the invalid write */
+		nfs_wb_page_priority(inode, page, wb_priority(wbc));
+	}
 
-	err = 0;
 	offset = nfs_page_length(page);
 	if (!offset)
 		goto out;
@@ -655,7 +662,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
 	struct nfs_page	*req;
-	int		status = 0;
+	int do_flush, status;
 	/*
 	 * Look for a request corresponding to this page. If there
 	 * is one, and it belongs to another file, we flush it out
@@ -664,15 +671,18 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 	 * Also do the same if we find a request from an existing
 	 * dropped page.
 	 */
-	req = nfs_page_find_request(page);
-	if (req != NULL) {
-		int do_flush = req->wb_page != page || req->wb_context != ctx;
-
+	do {
+		req = nfs_page_find_request(page);
+		if (req == NULL)
+			return 0;
+		do_flush = req->wb_page != page || req->wb_context != ctx
+			|| test_bit(PG_NEED_FLUSH, &req->wb_flags);
 		nfs_release_request(req);
-		if (do_flush)
-			status = nfs_wb_page(page->mapping->host, page);
-	}
-	return (status < 0) ? status : 0;
+		if (!do_flush)
+			return 0;
+		status = nfs_wb_page(page->mapping->host, page);
+	} while (status == 0);
+	return status;
 }
 
 /*
@@ -1437,6 +1447,19 @@ int nfs_wb_page(struct inode *inode, struct page* page)
 	return nfs_wb_page_priority(inode, page, 0);
 }
 
+int nfs_set_page_dirty(struct page *page)
+{
+	struct nfs_page *req;
+
+	req = nfs_page_find_request(page);
+	if (req != NULL) {
+		/* Mark any existing write requests for flushing */
+		set_bit(PG_NEED_FLUSH, &req->wb_flags);
+		nfs_release_request(req);
+	}
+	return __set_page_dirty_nobuffers(page);
+}
+
 
 int __init nfs_init_writepagecache(void)
 {
-- 
cgit v1.2.3


From 4d770ccf4257b23a7ca2a85de1b1c22657b581d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:41 -0500
Subject: NFS: Ensure that nfs_wb_page() calls writepage when necessary.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 266fea71cfc..0eca6a54210 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -247,7 +247,7 @@ static int wb_priority(struct writeback_control *wbc)
 /*
  * Write an mmapped page to the server.
  */
-int nfs_writepage(struct page *page, struct writeback_control *wbc)
+static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
@@ -265,7 +265,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 		if (!flushme)
 			goto out;
 		/* Ensure we've flushed out the invalid write */
-		nfs_wb_page_priority(inode, page, wb_priority(wbc));
+		nfs_wb_page_priority(inode, page, wb_priority(wbc) | FLUSH_STABLE | FLUSH_NOWRITEPAGE);
 	}
 
 	offset = nfs_page_length(page);
@@ -283,6 +283,14 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 out:
 	if (!wbc->for_writepages)
 		nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
+	return err;
+}
+
+int nfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	int err;
+
+	err = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
 	return err; 
 }
@@ -1435,8 +1443,26 @@ static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	struct writeback_control wbc = {
+		.bdi = page->mapping->backing_dev_info,
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+	int ret;
 
-	return nfs_sync_mapping_range(inode->i_mapping, range_start, range_end, how | FLUSH_STABLE);
+	BUG_ON(!PageLocked(page));
+	if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) {
+		ret = nfs_writepage_locked(page, &wbc);
+		if (ret < 0)
+			goto out;
+	}
+	ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
+	if (ret >= 0)
+		return 0;
+out:
+	return ret;
 }
 
 /*
@@ -1444,7 +1470,7 @@ static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
  */
 int nfs_wb_page(struct inode *inode, struct page* page)
 {
-	return nfs_wb_page_priority(inode, page, 0);
+	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
 }
 
 int nfs_set_page_dirty(struct page *page)
-- 
cgit v1.2.3


From e261f51f25b98c213e0b3d7f2109b117d714f69d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:41 -0500
Subject: NFS: Make nfs_updatepage() mark the page as dirty.

This will ensure that we can call set_page_writeback() from within
nfs_writepage(), which is always called with the page lock set.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 73 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0eca6a54210..130528d09a2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -77,6 +77,7 @@
 static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct page *,
 					    unsigned int, unsigned int);
+static void nfs_mark_request_dirty(struct nfs_page *req);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
@@ -244,6 +245,48 @@ static int wb_priority(struct writeback_control *wbc)
 	return 0;
 }
 
+/*
+ * Find an associated nfs write request, and prepare to flush it out
+ * Returns 1 if there was no write request, or if the request was
+ * already tagged by nfs_set_page_dirty.Returns 0 if the request
+ * was not tagged.
+ * May also return an error if the user signalled nfs_wait_on_request().
+ */
+static int nfs_page_mark_flush(struct page *page)
+{
+	struct nfs_page *req;
+	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+	int ret;
+
+	spin_lock(req_lock);
+	for(;;) {
+		req = nfs_page_find_request_locked(page);
+		if (req == NULL) {
+			spin_unlock(req_lock);
+			return 1;
+		}
+		if (nfs_lock_request_dontget(req))
+			break;
+		/* Note: If we hold the page lock, as is the case in nfs_writepage,
+		 *	 then the call to nfs_lock_request_dontget() will always
+		 *	 succeed provided that someone hasn't already marked the
+		 *	 request as dirty (in which case we don't care).
+		 */
+		spin_unlock(req_lock);
+		ret = nfs_wait_on_request(req);
+		nfs_release_request(req);
+		if (ret != 0)
+			return ret;
+		spin_lock(req_lock);
+	}
+	spin_unlock(req_lock);
+	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0)
+		nfs_mark_request_dirty(req);
+	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+	nfs_unlock_request(req);
+	return ret;
+}
+
 /*
  * Write an mmapped page to the server.
  */
@@ -251,23 +294,16 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 {
 	struct nfs_open_context *ctx;
 	struct inode *inode = page->mapping->host;
-	struct nfs_page *req;
 	unsigned offset;
-	int err = 0;
+	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
-	req = nfs_page_find_request(page);
-	if (req != NULL) {
-		int flushme = test_bit(PG_NEED_FLUSH, &req->wb_flags);
-		nfs_release_request(req);
-		if (!flushme)
-			goto out;
-		/* Ensure we've flushed out the invalid write */
-		nfs_wb_page_priority(inode, page, wb_priority(wbc) | FLUSH_STABLE | FLUSH_NOWRITEPAGE);
-	}
-
+	err = nfs_page_mark_flush(page);
+	if (err <= 0)
+		goto out;
+	err = 0;
 	offset = nfs_page_length(page);
 	if (!offset)
 		goto out;
@@ -279,7 +315,11 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 	}
 	err = nfs_writepage_setup(ctx, page, 0, offset);
 	put_nfs_open_context(ctx);
-
+	if (err != 0)
+		goto out;
+	err = nfs_page_mark_flush(page);
+	if (err > 0)
+		err = 0;
 out:
 	if (!wbc->for_writepages)
 		nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
@@ -409,8 +449,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
 static inline int
 nfs_dirty_request(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
-	return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
+	return test_bit(PG_FLUSHING, &req->wb_flags) == 0;
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -628,7 +667,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				return ERR_PTR(error);
 			}
 			spin_unlock(&nfsi->req_lock);
-			nfs_mark_request_dirty(new);
 			return new;
 		}
 		spin_unlock(&nfsi->req_lock);
@@ -684,7 +722,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 		if (req == NULL)
 			return 0;
 		do_flush = req->wb_page != page || req->wb_context != ctx
-			|| test_bit(PG_NEED_FLUSH, &req->wb_flags);
+			|| !nfs_dirty_request(req);
 		nfs_release_request(req);
 		if (!do_flush)
 			return 0;
@@ -723,6 +761,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	}
 
 	status = nfs_writepage_setup(ctx, page, offset, count);
+	__set_page_dirty_nobuffers(page);
 
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
 			status, (long long)i_size_read(inode));
-- 
cgit v1.2.3


From 61822ab5e3ed09fcfc49e37227b655202adf6130 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:42 -0500
Subject: NFS: Ensure we only call set_page_writeback() under the page lock

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c  |  6 +-----
 fs/nfs/write.c | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c2fe3bd83ab..238fb6641aa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -307,14 +307,10 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
-	loff_t range_start, range_end;
-
 	if (offset != 0)
 		return;
 	/* Cancel any unstarted writes on this page */
-	range_start = page_offset(page);
-	range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	nfs_sync_mapping_range(page->mapping, range_start, range_end, FLUSH_INVALIDATE);
+	nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 130528d09a2..bd4dff9dbd6 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -81,7 +81,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
-static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -280,8 +279,10 @@ static int nfs_page_mark_flush(struct page *page)
 		spin_lock(req_lock);
 	}
 	spin_unlock(req_lock);
-	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0)
+	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
 		nfs_mark_request_dirty(req);
+		set_page_writeback(page);
+	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
 	return ret;
@@ -443,6 +444,13 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	mark_inode_dirty(inode);
 }
 
+static void
+nfs_redirty_request(struct nfs_page *req)
+{
+	clear_bit(PG_FLUSHING, &req->wb_flags);
+	__set_page_dirty_nobuffers(req->wb_page);
+}
+
 /*
  * Check if a request is dirty
  */
@@ -777,7 +785,7 @@ static void nfs_writepage_release(struct nfs_page *req)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (!PageError(req->wb_page)) {
 		if (NFS_NEED_RESCHED(req)) {
-			nfs_mark_request_dirty(req);
+			nfs_redirty_request(req);
 			goto out;
 		} else if (NFS_NEED_COMMIT(req)) {
 			nfs_mark_request_commit(req);
@@ -893,7 +901,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 	atomic_set(&req->wb_complete, requests);
 
 	ClearPageError(page);
-	set_page_writeback(page);
 	offset = 0;
 	nbytes = req->wb_bytes;
 	do {
@@ -923,7 +930,7 @@ out_bad:
 		list_del(&data->pages);
 		nfs_writedata_release(data);
 	}
-	nfs_mark_request_dirty(req);
+	nfs_redirty_request(req);
 	nfs_clear_page_writeback(req);
 	return -ENOMEM;
 }
@@ -954,7 +961,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 		nfs_list_remove_request(req);
 		nfs_list_add_request(req, &data->pages);
 		ClearPageError(req->wb_page);
-		set_page_writeback(req->wb_page);
 		*pages++ = req->wb_page;
 		count += req->wb_bytes;
 	}
@@ -969,7 +975,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_mark_request_dirty(req);
+		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
@@ -1004,7 +1010,7 @@ out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_mark_request_dirty(req);
+		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
 	return error;
@@ -1320,7 +1326,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		}
 		/* We have a mismatch. Write the page again */
 		dprintk(" mismatch\n");
-		nfs_mark_request_dirty(req);
+		nfs_redirty_request(req);
 	next:
 		nfs_clear_page_writeback(req);
 	}
@@ -1451,13 +1457,18 @@ int nfs_wb_all(struct inode *inode)
 		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
+		.for_writepages = 1,
 		.range_cyclic = 1,
 	};
 	int ret;
 
+	ret = generic_writepages(mapping, &wbc);
+	if (ret < 0)
+		goto out;
 	ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
 	if (ret >= 0)
 		return 0;
+out:
 	return ret;
 }
 
@@ -1469,16 +1480,23 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo
 		.nr_to_write = LONG_MAX,
 		.range_start = range_start,
 		.range_end = range_end,
+		.for_writepages = 1,
 	};
 	int ret;
 
+	if (!(how & FLUSH_NOWRITEPAGE)) {
+		ret = generic_writepages(mapping, &wbc);
+		if (ret < 0)
+			goto out;
+	}
 	ret = nfs_sync_mapping_wait(mapping, &wbc, how);
 	if (ret >= 0)
 		return 0;
+out:
 	return ret;
 }
 
-static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
+int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-- 
cgit v1.2.3


From fa8d8c5b77a2dc467b5365a5651710161b84f16d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:42 -0500
Subject: NFS: Fix nfs_release_page

invalidate_inode_pages2_range() will clear the PG_dirty bit before calling
try_to_release_page().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 238fb6641aa..8e28bffc35a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -315,13 +315,14 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	if (gfp & __GFP_FS)
-		return !nfs_wb_page(page->mapping->host, page);
-	else
-		/*
-		 * Avoid deadlock on nfs_wait_on_request().
-		 */
+	/*
+	 * Avoid deadlock on nfs_wait_on_request().
+	 */
+	if (!(gfp & __GFP_FS))
 		return 0;
+	/* Hack... Force nfs_wb_page() to write out the page */
+	SetPageDirty(page);
+	return !nfs_wb_page(page->mapping->host, page);
 }
 
 const struct address_space_operations nfs_file_aops = {
-- 
cgit v1.2.3


From e507d9ebbb2d5db5948a6fb3c33f015d60708d19 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:35:42 -0500
Subject: NFS: Ensure the inode is marked as dirty if we break out of
 nfs_wb_all()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bd4dff9dbd6..a4c9df67a3a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1469,6 +1469,7 @@ int nfs_wb_all(struct inode *inode)
 	if (ret >= 0)
 		return 0;
 out:
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	return ret;
 }
 
@@ -1493,6 +1494,7 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo
 	if (ret >= 0)
 		return 0;
 out:
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	return ret;
 }
 
@@ -1519,6 +1521,7 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
 	if (ret >= 0)
 		return 0;
 out:
+	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 3925675cb37cc9c3fd1d3f56ce0fa729f995f863 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:36:09 -0500
Subject: NFS: Fix up the dirty page accounting

There is now no reason to account for the dirty pages in the NFS code,
since the VM code will now do it for us via __set_page_dirty_nobuffers(),
and set_page_writeback().

We still need to keep the accounting of stable writes, though.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 1 -
 fs/nfs/write.c    | 1 -
 2 files changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e046c9b6a9d..47ae2b4dd1d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -331,7 +331,6 @@ long nfs_scan_dirty(struct address_space *mapping,
 			nfsi->ndirty--;
 			nfs_list_remove_request(req);
 			nfs_list_add_request(req, dst);
-			dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
 			res++;
 			if (res == LONG_MAX)
 				goto out;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a4c9df67a3a..9624aecab9b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
-	inc_zone_page_state(req->wb_page, NR_FILE_DIRTY);
 	mark_inode_dirty(inode);
 }
 
-- 
cgit v1.2.3


From 9cf85e0a243b56f4ef8439a1e6430307fd9dcda6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:36:56 -0500
Subject: NFS: Fix up writeback_control->nr_to_write accounting

We're really accounting for the same page twice now: once in
generic_writepages(), and once in nfs_scan_dirty().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 47ae2b4dd1d..bc9fab68b29 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -288,11 +288,10 @@ long nfs_scan_dirty(struct address_space *mapping,
 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 	struct nfs_page *req;
 	pgoff_t idx_start, idx_end;
-	long count = wbc->nr_to_write;
 	long res = 0;
 	int found, i;
 
-	if (nfsi->ndirty == 0 || count <= 0)
+	if (nfsi->ndirty == 0)
 		return 0;
 	if (wbc->range_cyclic) {
 		idx_start = 0;
@@ -308,8 +307,6 @@ long nfs_scan_dirty(struct address_space *mapping,
 	for (;;) {
 		unsigned int toscan = NFS_SCAN_MAXENTRIES;
 
-		if (toscan > count)
-			toscan = count;
 		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
 				(void **)&pgvec[0], idx_start, toscan,
 				NFS_PAGE_TAG_DIRTY);
@@ -334,16 +331,11 @@ long nfs_scan_dirty(struct address_space *mapping,
 			res++;
 			if (res == LONG_MAX)
 				goto out;
-			count--;
-			if (count == 0)
-				goto out;
-
 next:
 			idx_start = req->wb_index + 1;
 		}
 	}
 out:
-	wbc->nr_to_write = count;
 	WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
 	return res;
 }
-- 
cgit v1.2.3


From a18030445fd4dd20e2248007b5d1cf0b5d89c69d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Dec 2006 00:45:19 -0500
Subject: NFS: Clean up calls to mark_inode_dirty() part 2

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9624aecab9b..7f3844d2bf3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
-	mark_inode_dirty(inode);
+	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 }
 
 static void
@@ -474,7 +474,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 	nfsi->ncommit++;
 	spin_unlock(&nfsi->req_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-	mark_inode_dirty(inode);
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
 #endif
 
-- 
cgit v1.2.3


From c041b5ff8d50c432698d9bfbd68cc4b76c2ea5bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 5 Dec 2006 16:36:03 -0500
Subject: NLM: fix print format for tk_pid

The tk_pid field is an unsigned short.  The proper print format specifier for
that type is %5u, not %4d.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 2 +-
 fs/lockd/svc4proc.c | 2 +-
 fs/lockd/svcproc.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3d84f600b63..41c983a0529 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -729,7 +729,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 		goto retry_cancel;
 	}
 
-	dprintk("lockd: cancel status %d (task %d)\n",
+	dprintk("lockd: cancel status %u (task %u)\n",
 			req->a_res.status, task->tk_pid);
 
 	switch (req->a_res.status) {
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 0ce5c81ff50..f67146a8199 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -234,7 +234,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
  */
 static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+	dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
 			-task->tk_status);
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 32e99a6e8dc..3707c3a23e9 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -263,7 +263,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
  */
 static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+	dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
 			-task->tk_status);
 }
 
-- 
cgit v1.2.3


From 36b12fb709d229f277efbbe710031d5a429b2412 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 6 Dec 2006 17:48:32 -0600
Subject: JFS: Fix conflicting superblock flags

JFS_NOINTEGRITY and JFS_USRQUOTA are defined to be the same value.
Change JFS_NOINTEGRITY to 0x40 and re-order the flags in the header
file to avoid repeating this problem.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_filsys.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index eb550b339bb..38f70ac03be 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -29,31 +29,21 @@
 /*
  *	 file system option (superblock flag)
  */
-/* mount time flag to disable journaling to disk */
-#define JFS_NOINTEGRITY 0x00000010
+
+/* directory option */
+#define JFS_UNICODE	0x00000001	/* unicode name */
 
 /* mount time flags for error handling */
 #define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
 #define JFS_ERR_CONTINUE   0x00000004   /* continue */
 #define JFS_ERR_PANIC      0x00000008   /* panic */
 
+/* Quota support */
 #define	JFS_USRQUOTA	0x00000010
 #define	JFS_GRPQUOTA	0x00000020
 
-/* platform option (conditional compilation) */
-#define JFS_AIX		0x80000000	/* AIX support */
-/*	POSIX name/directory  support */
-
-#define JFS_OS2		0x40000000	/* OS/2 support */
-/*	case-insensitive name/directory support */
-
-#define JFS_DFS		0x20000000	/* DCE DFS LFS support */
-
-#define JFS_LINUX	0x10000000	/* Linux support */
-/*	case-sensitive name/directory support */
-
-/* directory option */
-#define JFS_UNICODE	0x00000001	/* unicode name */
+/* mount time flag to disable journaling to disk */
+#define JFS_NOINTEGRITY 0x00000040
 
 /* commit option */
 #define	JFS_COMMIT	0x00000f00	/* commit option mask */
@@ -61,6 +51,7 @@
 #define	JFS_LAZYCOMMIT	0x00000200	/* lazy commit */
 #define	JFS_TMPFS	0x00000400	/* temporary file system -
 					 * do not log/commit:
+					 * Never implemented
 					 */
 
 /* log logical volume option */
@@ -74,16 +65,25 @@
 #define JFS_SPARSE	0x00020000	/* sparse regular file */
 
 /* DASD Limits		F226941 */
-#define JFS_DASD_ENABLED	0x00040000	/* DASD limits enabled */
-#define	JFS_DASD_PRIME		0x00080000	/* Prime DASD usage on boot */
+#define JFS_DASD_ENABLED 0x00040000	/* DASD limits enabled */
+#define	JFS_DASD_PRIME	0x00080000	/* Prime DASD usage on boot */
 
 /* big endian flag */
-#define	JFS_SWAP_BYTES		0x00100000	/* running on big endian computer */
+#define	JFS_SWAP_BYTES	0x00100000	/* running on big endian computer */
 
 /* Directory index */
-#define JFS_DIR_INDEX		0x00200000	/* Persistent index for */
-						/* directory entries    */
+#define JFS_DIR_INDEX	0x00200000	/* Persistent index for */
 
+/* platform options */
+#define JFS_LINUX	0x10000000	/* Linux support */
+#define JFS_DFS		0x20000000	/* DCE DFS LFS support */
+/*	Never implemented */
+
+#define JFS_OS2		0x40000000	/* OS/2 support */
+/*	case-insensitive name/directory support */
+
+#define JFS_AIX		0x80000000	/* AIX support */
+/*	POSIX name/directory  support - Never implemented*/
 
 /*
  *	buffer cache configuration
-- 
cgit v1.2.3


From 67fd44fea274a5033ceb90284683bc44df61df54 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:03 +0100
Subject: [PATCH] x86-64: Implement compat code for SIOCSIFHWBROADCAST

This network ioctl wasn't handled before.

Reported by Alexandra.Kossovsky@oktetlabs.ru (Alexandra Kossovsky)
Signed-off-by: Andi Kleen <ak@suse.de>
---
 fs/compat_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a91f2628c98..47bb78d1c3f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2397,6 +2397,7 @@ HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
 HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
 HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
 HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
 
 /* ioctls used by appletalk ddp.c */
 HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
-- 
cgit v1.2.3


From 34126f9f41901ca9d7d0031c2b11fc0d6c07b72d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 7 Dec 2006 09:13:14 -0500
Subject: [GFS2] Change gfs2_fsync() to use write_inode_now()

This is a bit better than the previous version of gfs2_fsync()
although it would be better still if we were able to call a
function which only wrote the inode & metadata. Its no big deal
though that this will potentially write the data as well since
the VFS has already done that before calling gfs2_fsync(). I've
also added a comment to explain whats going on here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
---
 fs/gfs2/ops_file.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 7bd971bf7cd..b3f1e0349ae 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -510,6 +510,11 @@ static int gfs2_close(struct inode *inode, struct file *file)
  * is set. For stuffed inodes we must flush the log in order to
  * ensure that all data is on disk.
  *
+ * The call to write_inode_now() is there to write back metadata and
+ * the inode itself. It does also try and write the data, but thats
+ * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
+ * for us.
+ *
  * Returns: errno
  */
 
@@ -518,10 +523,6 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 	struct inode *inode = dentry->d_inode;
 	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
 	int ret = 0;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,
-	};
 
 	if (gfs2_is_jdata(GFS2_I(inode))) {
 		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
@@ -530,7 +531,7 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	if (sync_state != 0) {
 		if (!datasync)
-			ret = sync_inode(inode, &wbc);
+			ret = write_inode_now(inode, 0);
 
 		if (gfs2_is_stuffed(GFS2_I(inode)))
 			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-- 
cgit v1.2.3


From ac33d0710595579e3cfca42dde2257eb0b123f6d Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Wed, 6 Dec 2006 15:10:37 +0000
Subject: [DLM] Clean up lowcomms

This fixes up most of the things pointed out by akpm and Pavel Machek
with comments below indicating why some things have been left:

Andrew Morton wrote:
>
>> +static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
>> +{
>> +	struct nodeinfo *ni;
>> +	int r;
>> +	int n;
>> +
>> +	down_read(&nodeinfo_lock);
>
> Given that this function can sleep, I wonder if `alloc' is useful.
>
> I see lots of callers passing in a literal "0" for `alloc'.  That's in fact
> a secret (GFP_ATOMIC & ~__GFP_HIGH).  I doubt if that's what you really
> meant.  Particularly as the code could at least have used __GFP_WAIT (aka
> GFP_NOIO) which is much, much more reliable than "0".  In fact "0" is the
> least reliable mode possible.
>
> IOW, this is all bollixed up.

When 0 is passed into nodeid2nodeinfo the function does not try to allocate a
new structure at all. it's an indication that the caller only wants the nodeinfo
struct for that nodeid if there actually is one in existance.
I've tidied the function itself so it's more obvious, (and tidier!)

>> +/* Data received from remote end */
>> +static int receive_from_sock(void)
>> +{
>> +	int ret = 0;
>> +	struct msghdr msg;
>> +	struct kvec iov[2];
>> +	unsigned len;
>> +	int r;
>> +	struct sctp_sndrcvinfo *sinfo;
>> +	struct cmsghdr *cmsg;
>> +	struct nodeinfo *ni;
>> +
>> +	/* These two are marginally too big for stack allocation, but this
>> +	 * function is (currently) only called by dlm_recvd so static should be
>> +	 * OK.
>> +	 */
>> +	static struct sockaddr_storage msgname;
>> +	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> whoa.  This is globally singly-threaded code??

Yes. it is only ever run in the context of dlm_recvd.
>>
>> +static void initiate_association(int nodeid)
>> +{
>> +	struct sockaddr_storage rem_addr;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Another static buffer to worry about.  Globally singly-threaded code?

Yes. Only ever called by dlm_sendd.

>> +
>> +/* Send a message */
>> +static int send_to_sock(struct nodeinfo *ni)
>> +{
>> +	int ret = 0;
>> +	struct writequeue_entry *e;
>> +	int len, offset;
>> +	struct msghdr outmsg;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Singly-threaded?

Yep.

>>
>> +static void dealloc_nodeinfo(void)
>> +{
>> +	int i;
>> +
>> +	for (i=1; i<=max_nodeid; i++) {
>> +		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
>> +		if (ni) {
>> +			idr_remove(&nodeinfo_idr, i);
>
> Didn't that need locking?

Not. it's only ever called at DLM shutdown after all the other threads
have been stopped.

>>
>> +static int write_list_empty(void)
>> +{
>> +	int status;
>> +
>> +	spin_lock_bh(&write_nodes_lock);
>> +	status = list_empty(&write_nodes);
>> +	spin_unlock_bh(&write_nodes_lock);
>> +
>> +	return status;
>> +}
>
> This function's return value is meaningless.  As soon as the lock gets
> dropped, the return value can get out of sync with reality.
>
> Looking at the caller, this _might_ happen to be OK, but it's a nasty and
> dangerous thing.  Really the locking should be moved into the caller.

It's just an optimisation to allow the caller to schedule if there is no work
to do. if something arrives immediately afterwards then it will get picked up
when the process re-awakes (and it will be woken by that arrival).

The 'accepting' atomic has gone completely. as Andrew pointed out it didn't
really achieve much anyway. I suspect it was a plaster over some other
startup or shutdown bug to be honest.


Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Pavel Machek <pavel@ucw.cz>
---
 fs/dlm/lowcomms-sctp.c | 264 ++++++++++++++++++--------------------
 fs/dlm/lowcomms-tcp.c  | 342 +++++++++++++++++++------------------------------
 fs/dlm/lowcomms.h      |   2 -
 fs/dlm/main.c          |  10 +-
 4 files changed, 261 insertions(+), 357 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index 6da6b14d5a6..fe158d7a928 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -75,13 +75,13 @@ struct nodeinfo {
 };
 
 static DEFINE_IDR(nodeinfo_idr);
-static struct rw_semaphore	nodeinfo_lock;
-static int			max_nodeid;
+static DECLARE_RWSEM(nodeinfo_lock);
+static int max_nodeid;
 
 struct cbuf {
-	unsigned		base;
-	unsigned		len;
-	unsigned		mask;
+	unsigned int base;
+	unsigned int len;
+	unsigned int mask;
 };
 
 /* Just the one of these, now. But this struct keeps
@@ -90,9 +90,9 @@ struct cbuf {
 #define CF_READ_PENDING 1
 
 struct connection {
-	struct socket          *sock;
+	struct socket           *sock;
 	unsigned long		flags;
-	struct page            *rx_page;
+	struct page             *rx_page;
 	atomic_t		waiting_requests;
 	struct cbuf		cb;
 	int                     eagain_flag;
@@ -102,36 +102,40 @@ struct connection {
 
 struct writequeue_entry {
 	struct list_head	list;
-	struct page            *page;
+	struct page             *page;
 	int			offset;
 	int			len;
 	int			end;
 	int			users;
-	struct nodeinfo        *ni;
+	struct nodeinfo         *ni;
 };
 
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+static void cbuf_add(struct cbuf *cb, int n)
+{
+	cb->len += n;
+}
 
-#define CBUF_INIT(cb, size) \
-do { \
-	(cb)->base = (cb)->len = 0; \
-	(cb)->mask = ((size)-1); \
-} while(0)
+static int cbuf_data(struct cbuf *cb)
+{
+	return ((cb->base + cb->len) & cb->mask);
+}
 
-#define CBUF_EAT(cb, n) \
-do { \
-	(cb)->len  -= (n); \
-	(cb)->base += (n); \
-	(cb)->base &= (cb)->mask; \
-} while(0)
+static void cbuf_init(struct cbuf *cb, int size)
+{
+	cb->base = cb->len = 0;
+	cb->mask = size-1;
+}
 
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+	cb->len  -= n;
+	cb->base += n;
+	cb->base &= cb->mask;
+}
 
 /* List of nodes which have writes pending */
-static struct list_head write_nodes;
-static spinlock_t write_nodes_lock;
+static LIST_HEAD(write_nodes);
+static DEFINE_SPINLOCK(write_nodes_lock);
 
 /* Maximum number of incoming messages to process before
  * doing a schedule()
@@ -141,8 +145,7 @@ static spinlock_t write_nodes_lock;
 /* Manage daemons */
 static struct task_struct *recv_task;
 static struct task_struct *send_task;
-static wait_queue_head_t lowcomms_recv_wait;
-static atomic_t accepting;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
 
 /* The SCTP connection */
 static struct connection sctp_con;
@@ -161,11 +164,11 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 		return error;
 
 	if (dlm_local_addr[0]->ss_family == AF_INET) {
-	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
+		struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
 		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
 	} else {
-	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
+		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
 		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
 		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
 		       sizeof(in6->sin6_addr));
@@ -174,6 +177,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 	return 0;
 }
 
+/* If alloc is 0 here we will not attempt to allocate a new
+   nodeinfo struct */
 static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
 {
 	struct nodeinfo *ni;
@@ -184,44 +189,45 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
 	ni = idr_find(&nodeinfo_idr, nodeid);
 	up_read(&nodeinfo_lock);
 
-	if (!ni && alloc) {
-		down_write(&nodeinfo_lock);
+	if (ni || !alloc)
+		return ni;
 
-		ni = idr_find(&nodeinfo_idr, nodeid);
-		if (ni)
-			goto out_up;
+	down_write(&nodeinfo_lock);
 
-		r = idr_pre_get(&nodeinfo_idr, alloc);
-		if (!r)
-			goto out_up;
+	ni = idr_find(&nodeinfo_idr, nodeid);
+	if (ni)
+		goto out_up;
 
-		ni = kmalloc(sizeof(struct nodeinfo), alloc);
-		if (!ni)
-			goto out_up;
+	r = idr_pre_get(&nodeinfo_idr, alloc);
+	if (!r)
+		goto out_up;
 
-		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
-		if (r) {
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		if (n != nodeid) {
-			idr_remove(&nodeinfo_idr, n);
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		memset(ni, 0, sizeof(struct nodeinfo));
-		spin_lock_init(&ni->lock);
-		INIT_LIST_HEAD(&ni->writequeue);
-		spin_lock_init(&ni->writequeue_lock);
-		ni->nodeid = nodeid;
-
-		if (nodeid > max_nodeid)
-			max_nodeid = nodeid;
-	out_up:
-		up_write(&nodeinfo_lock);
+	ni = kmalloc(sizeof(struct nodeinfo), alloc);
+	if (!ni)
+		goto out_up;
+
+	r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
+	if (r) {
+		kfree(ni);
+		ni = NULL;
+		goto out_up;
 	}
+	if (n != nodeid) {
+		idr_remove(&nodeinfo_idr, n);
+		kfree(ni);
+		ni = NULL;
+		goto out_up;
+	}
+	memset(ni, 0, sizeof(struct nodeinfo));
+	spin_lock_init(&ni->lock);
+	INIT_LIST_HEAD(&ni->writequeue);
+	spin_lock_init(&ni->writequeue_lock);
+	ni->nodeid = nodeid;
+
+	if (nodeid > max_nodeid)
+		max_nodeid = nodeid;
+out_up:
+	up_write(&nodeinfo_lock);
 
 	return ni;
 }
@@ -279,13 +285,13 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 		in4_addr->sin_port = cpu_to_be16(port);
 		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
 		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in));
+		       sizeof(struct sockaddr_in));
 		*addr_len = sizeof(struct sockaddr_in);
 	} else {
 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
 		in6_addr->sin6_port = cpu_to_be16(port);
 		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in6));
+		       sizeof(struct sockaddr_in6));
 		*addr_len = sizeof(struct sockaddr_in6);
 	}
 }
@@ -324,7 +330,7 @@ static void send_shutdown(sctp_assoc_t associd)
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
 	outmessage.msg_controllen = cmsg->cmsg_len;
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 
 	sinfo->sinfo_flags |= MSG_EOF;
@@ -387,7 +393,7 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 
 			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
 				log_print("COMM_UP for invalid assoc ID %d",
-					 (int)sn->sn_assoc_change.sac_assoc_id);
+					  (int)sn->sn_assoc_change.sac_assoc_id);
 				init_failed();
 				return;
 			}
@@ -398,15 +404,18 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 			fs = get_fs();
 			set_fs(get_ds());
 			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
-						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-						(char*)&prim, &prim_len);
+							     IPPROTO_SCTP,
+							     SCTP_PRIMARY_ADDR,
+							     (char*)&prim,
+							     &prim_len);
 			set_fs(fs);
 			if (ret < 0) {
 				struct nodeinfo *ni;
 
 				log_print("getsockopt/sctp_primary_addr on "
 					  "new assoc %d failed : %d",
-				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
+					  (int)sn->sn_assoc_change.sac_assoc_id,
+					  ret);
 
 				/* Retry INIT later */
 				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
@@ -426,12 +435,10 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 				return;
 
 			/* Save the assoc ID */
-			spin_lock(&ni->lock);
 			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
-			spin_unlock(&ni->lock);
 
 			log_print("got new/restarted association %d nodeid %d",
-			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
+				  (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
 
 			/* Send any pending writes */
 			clear_bit(NI_INIT_PENDING, &ni->flags);
@@ -507,13 +514,12 @@ static int receive_from_sock(void)
 		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
 		if (sctp_con.rx_page == NULL)
 			goto out_resched;
-		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
+		cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE);
 	}
 
 	memset(&incmsg, 0, sizeof(incmsg));
 	memset(&msgname, 0, sizeof(msgname));
 
-	memset(incmsg, 0, sizeof(incmsg));
 	msg.msg_name = &msgname;
 	msg.msg_namelen = sizeof(msgname);
 	msg.msg_flags = 0;
@@ -532,17 +538,17 @@ static int receive_from_sock(void)
 	 * iov[0] is the bit of the circular buffer between the current end
 	 * point (cb.base + cb.len) and the end of the buffer.
 	 */
-	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
+	iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb);
 	iov[0].iov_base = page_address(sctp_con.rx_page) +
-			  CBUF_DATA(&sctp_con.cb);
+		cbuf_data(&sctp_con.cb);
 	iov[1].iov_len = 0;
 
 	/*
 	 * iov[1] is the bit of the circular buffer between the start of the
 	 * buffer and the start of the currently used section (cb.base)
 	 */
-	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
+	if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb);
 		iov[1].iov_len = sctp_con.cb.base;
 		iov[1].iov_base = page_address(sctp_con.rx_page);
 		msg.msg_iovlen = 2;
@@ -557,7 +563,7 @@ static int receive_from_sock(void)
 	msg.msg_control = incmsg;
 	msg.msg_controllen = sizeof(incmsg);
 	cmsg = CMSG_FIRSTHDR(&msg);
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 
 	if (msg.msg_flags & MSG_NOTIFICATION) {
 		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
@@ -583,29 +589,29 @@ static int receive_from_sock(void)
 	if (r == 1)
 		return 0;
 
-	CBUF_ADD(&sctp_con.cb, ret);
+	cbuf_add(&sctp_con.cb, ret);
 	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
 					  page_address(sctp_con.rx_page),
 					  sctp_con.cb.base, sctp_con.cb.len,
 					  PAGE_CACHE_SIZE);
 	if (ret < 0)
 		goto out_close;
-	CBUF_EAT(&sctp_con.cb, ret);
+	cbuf_eat(&sctp_con.cb, ret);
 
-      out:
+out:
 	ret = 0;
 	goto out_ret;
 
-      out_resched:
+out_resched:
 	lowcomms_data_ready(sctp_con.sock->sk, 0);
 	ret = 0;
-	schedule();
+	cond_resched();
 	goto out_ret;
 
-      out_close:
+out_close:
 	if (ret != -EAGAIN)
 		log_print("error reading from sctp socket: %d", ret);
-      out_ret:
+out_ret:
 	return ret;
 }
 
@@ -619,10 +625,12 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
 	set_fs(get_ds());
 	if (num == 1)
 		result = sctp_con.sock->ops->bind(sctp_con.sock,
-					(struct sockaddr *) addr, addr_len);
+						  (struct sockaddr *) addr,
+						  addr_len);
 	else
 		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
-				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
+							SCTP_SOCKOPT_BINDX_ADD,
+							(char *)addr, addr_len);
 	set_fs(fs);
 
 	if (result < 0)
@@ -719,10 +727,10 @@ static int init_sock(void)
 
 	return 0;
 
- create_delsock:
+create_delsock:
 	sock_release(sock);
 	sctp_con.sock = NULL;
- out:
+out:
 	return result;
 }
 
@@ -756,16 +764,13 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 	int users = 0;
 	struct nodeinfo *ni;
 
-	if (!atomic_read(&accepting))
-		return NULL;
-
 	ni = nodeid2nodeinfo(nodeid, allocation);
 	if (!ni)
 		return NULL;
 
 	spin_lock(&ni->writequeue_lock);
 	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &ni->writequeue) ||
+	if ((&e->list == &ni->writequeue) ||
 	    (PAGE_CACHE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
@@ -776,7 +781,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 	spin_unlock(&ni->writequeue_lock);
 
 	if (e) {
-	      got_one:
+	got_one:
 		if (users == 0)
 			kmap(e->page);
 		*ppc = page_address(e->page) + offset;
@@ -803,9 +808,6 @@ void dlm_lowcomms_commit_buffer(void *arg)
 	int users;
 	struct nodeinfo *ni = e->ni;
 
-	if (!atomic_read(&accepting))
-		return;
-
 	spin_lock(&ni->writequeue_lock);
 	users = --e->users;
 	if (users)
@@ -822,7 +824,7 @@ void dlm_lowcomms_commit_buffer(void *arg)
 	}
 	return;
 
-      out:
+out:
 	spin_unlock(&ni->writequeue_lock);
 	return;
 }
@@ -878,7 +880,7 @@ static void initiate_association(int nodeid)
 	cmsg->cmsg_level = IPPROTO_SCTP;
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
 
@@ -892,7 +894,7 @@ static void initiate_association(int nodeid)
 }
 
 /* Send a message */
-static int send_to_sock(struct nodeinfo *ni)
+static void send_to_sock(struct nodeinfo *ni)
 {
 	int ret = 0;
 	struct writequeue_entry *e;
@@ -903,13 +905,13 @@ static int send_to_sock(struct nodeinfo *ni)
 	struct sctp_sndrcvinfo *sinfo;
 	struct kvec iov;
 
-        /* See if we need to init an association before we start
+	/* See if we need to init an association before we start
 	   sending precious messages */
 	spin_lock(&ni->lock);
 	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
 		spin_unlock(&ni->lock);
 		initiate_association(ni->nodeid);
-		return 0;
+		return;
 	}
 	spin_unlock(&ni->lock);
 
@@ -923,7 +925,7 @@ static int send_to_sock(struct nodeinfo *ni)
 	cmsg->cmsg_level = IPPROTO_SCTP;
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
 	sinfo->sinfo_assoc_id = ni->assoc_id;
@@ -955,7 +957,7 @@ static int send_to_sock(struct nodeinfo *ni)
 				goto send_error;
 		} else {
 			/* Don't starve people filling buffers */
-			schedule();
+			cond_resched();
 		}
 
 		spin_lock(&ni->writequeue_lock);
@@ -964,15 +966,16 @@ static int send_to_sock(struct nodeinfo *ni)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
+			kunmap(e->page);
 			free_entry(e);
 			continue;
 		}
 	}
 	spin_unlock(&ni->writequeue_lock);
- out:
-	return ret;
+out:
+	return;
 
- send_error:
+send_error:
 	log_print("Error sending to node %d %d", ni->nodeid, ret);
 	spin_lock(&ni->lock);
 	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
@@ -982,7 +985,7 @@ static int send_to_sock(struct nodeinfo *ni)
 	} else
 		spin_unlock(&ni->lock);
 
-	return ret;
+	return;
 }
 
 /* Try to send any messages that are pending */
@@ -994,7 +997,7 @@ static void process_output_queue(void)
 	spin_lock_bh(&write_nodes_lock);
 	list_for_each_safe(list, temp, &write_nodes) {
 		struct nodeinfo *ni =
-		    list_entry(list, struct nodeinfo, write_list);
+			list_entry(list, struct nodeinfo, write_list);
 		clear_bit(NI_WRITE_PENDING, &ni->flags);
 		list_del(&ni->write_list);
 
@@ -1106,7 +1109,7 @@ static int dlm_recvd(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&lowcomms_recv_wait, &wait);
 		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
-			schedule();
+			cond_resched();
 		remove_wait_queue(&lowcomms_recv_wait, &wait);
 		set_current_state(TASK_RUNNING);
 
@@ -1118,12 +1121,12 @@ static int dlm_recvd(void *data)
 
 				/* Don't starve out everyone else */
 				if (++count >= MAX_RX_MSG_COUNT) {
-					schedule();
+					cond_resched();
 					count = 0;
 				}
 			} while (!kthread_should_stop() && ret >=0);
 		}
-		schedule();
+		cond_resched();
 	}
 
 	return 0;
@@ -1138,7 +1141,7 @@ static int dlm_sendd(void *data)
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (write_list_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		if (sctp_con.eagain_flag) {
@@ -1166,7 +1169,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_recvd %d", error);
 		return error;
 	}
@@ -1174,7 +1177,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_sendd %d", error);
 		kthread_stop(recv_task);
 		return error;
@@ -1197,43 +1200,28 @@ int dlm_lowcomms_start(void)
 	error = daemons_start();
 	if (error)
 		goto fail_sock;
-	atomic_set(&accepting, 1);
 	return 0;
 
- fail_sock:
+fail_sock:
 	close_connection();
 	return error;
 }
 
-/* Set all the activity flags to prevent any socket activity. */
-
 void dlm_lowcomms_stop(void)
 {
-	atomic_set(&accepting, 0);
+	int i;
+
 	sctp_con.flags = 0x7;
 	daemons_stop();
 	clean_writequeues();
 	close_connection();
 	dealloc_nodeinfo();
 	max_nodeid = 0;
-}
 
-int dlm_lowcomms_init(void)
-{
-	init_waitqueue_head(&lowcomms_recv_wait);
-	spin_lock_init(&write_nodes_lock);
-	INIT_LIST_HEAD(&write_nodes);
-	init_rwsem(&nodeinfo_lock);
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-	int i;
+	dlm_local_count = 0;
+	dlm_local_nodeid = 0;
 
 	for (i = 0; i < dlm_local_count; i++)
 		kfree(dlm_local_addr[i]);
-	dlm_local_count = 0;
-	dlm_local_nodeid = 0;
 }
 
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 7289e59b4bd..8f2791fc844 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -54,44 +54,59 @@
 #include "config.h"
 
 struct cbuf {
-	unsigned base;
-	unsigned len;
-	unsigned mask;
+	unsigned int base;
+	unsigned int len;
+	unsigned int mask;
 };
 
-#ifndef FALSE
-#define FALSE 0
-#define TRUE 1
-#endif
 #define NODE_INCREMENT 32
+static void cbuf_add(struct cbuf *cb, int n)
+{
+	cb->len += n;
+}
 
-#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_EAT(cb, n) do { (cb)->len  -= (n); \
-                             (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+static int cbuf_data(struct cbuf *cb)
+{
+	return ((cb->base + cb->len) & cb->mask);
+}
+
+static void cbuf_init(struct cbuf *cb, int size)
+{
+	cb->base = cb->len = 0;
+	cb->mask = size-1;
+}
+
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+	cb->len  -= n;
+	cb->base += n;
+	cb->base &= cb->mask;
+}
+
+static bool cbuf_empty(struct cbuf *cb)
+{
+	return cb->len == 0;
+}
 
 /* Maximum number of incoming messages to process before
-   doing a schedule()
+   doing a cond_resched()
 */
 #define MAX_RX_MSG_COUNT 25
 
 struct connection {
 	struct socket *sock;	/* NULL if not connected */
 	uint32_t nodeid;	/* So we know who we are in the list */
-	struct rw_semaphore sock_sem;	/* Stop connect races */
-	struct list_head read_list;	/* On this list when ready for reading */
-	struct list_head write_list;	/* On this list when ready for writing */
-	struct list_head state_list;	/* On this list when ready to connect */
+	struct rw_semaphore sock_sem; /* Stop connect races */
+	struct list_head read_list;   /* On this list when ready for reading */
+	struct list_head write_list;  /* On this list when ready for writing */
+	struct list_head state_list;  /* On this list when ready to connect */
 	unsigned long flags;	/* bit 1,2 = We are on the read/write lists */
 #define CF_READ_PENDING 1
 #define CF_WRITE_PENDING 2
 #define CF_CONNECT_PENDING 3
 #define CF_IS_OTHERCON 4
-	struct list_head writequeue;	/* List of outgoing writequeue_entries */
-	struct list_head listenlist;    /* List of allocated listening sockets */
+	struct list_head writequeue;  /* List of outgoing writequeue_entries */
+	struct list_head listenlist;  /* List of allocated listening sockets */
 	spinlock_t writequeue_lock;
 	int (*rx_action) (struct connection *);	/* What to do when active */
 	struct page *rx_page;
@@ -121,28 +136,27 @@ static struct task_struct *recv_task;
 static struct task_struct *send_task;
 
 static wait_queue_t lowcomms_send_waitq_head;
-static wait_queue_head_t lowcomms_send_waitq;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
 static wait_queue_t lowcomms_recv_waitq_head;
-static wait_queue_head_t lowcomms_recv_waitq;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
 
 /* An array of pointers to connections, indexed by NODEID */
 static struct connection **connections;
-static struct semaphore connections_lock;
+static DECLARE_MUTEX(connections_lock);
 static kmem_cache_t *con_cache;
 static int conn_array_size;
-static atomic_t accepting;
 
 /* List of sockets that have reads pending */
-static struct list_head read_sockets;
-static spinlock_t read_sockets_lock;
+static LIST_HEAD(read_sockets);
+static DEFINE_SPINLOCK(read_sockets_lock);
 
 /* List of sockets which have writes pending */
-static struct list_head write_sockets;
-static spinlock_t write_sockets_lock;
+static LIST_HEAD(write_sockets);
+static DEFINE_SPINLOCK(write_sockets_lock);
 
 /* List of sockets which have connects pending */
-static struct list_head state_sockets;
-static spinlock_t state_sockets_lock;
+static LIST_HEAD(state_sockets);
+static DEFINE_SPINLOCK(state_sockets_lock);
 
 static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 {
@@ -153,12 +167,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 		int new_size = nodeid + NODE_INCREMENT;
 		struct connection **new_conns;
 
-		new_conns = kmalloc(sizeof(struct connection *) *
+		new_conns = kzalloc(sizeof(struct connection *) *
 				    new_size, allocation);
 		if (!new_conns)
 			goto finish;
 
-		memset(new_conns, 0, sizeof(struct connection *) * new_size);
 		memcpy(new_conns, connections,  sizeof(struct connection *) * conn_array_size);
 		conn_array_size = new_size;
 		kfree(connections);
@@ -168,11 +181,10 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 
 	con = connections[nodeid];
 	if (con == NULL && allocation) {
-		con = kmem_cache_alloc(con_cache, allocation);
+		con = kmem_cache_zalloc(con_cache, allocation);
 		if (!con)
 			goto finish;
 
-		memset(con, 0, sizeof(*con));
 		con->nodeid = nodeid;
 		init_rwsem(&con->sock_sem);
 		INIT_LIST_HEAD(&con->writequeue);
@@ -181,7 +193,7 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 		connections[nodeid] = con;
 	}
 
- finish:
+finish:
 	up(&connections_lock);
 	return con;
 }
@@ -220,8 +232,6 @@ static inline void lowcomms_connect_sock(struct connection *con)
 {
 	if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
 		return;
-	if (!atomic_read(&accepting))
-		return;
 
 	spin_lock_bh(&state_sockets_lock);
 	list_add_tail(&con->state_list, &state_sockets);
@@ -232,31 +242,8 @@ static inline void lowcomms_connect_sock(struct connection *con)
 
 static void lowcomms_state_change(struct sock *sk)
 {
-/*	struct connection *con = sock2con(sk); */
-
-	switch (sk->sk_state) {
-	case TCP_ESTABLISHED:
+	if (sk->sk_state == TCP_ESTABLISHED)
 		lowcomms_write_space(sk);
-		break;
-
-	case TCP_FIN_WAIT1:
-	case TCP_FIN_WAIT2:
-	case TCP_TIME_WAIT:
-	case TCP_CLOSE:
-	case TCP_CLOSE_WAIT:
-	case TCP_LAST_ACK:
-	case TCP_CLOSING:
-		/* FIXME: I think this causes more trouble than it solves.
-		   lowcomms wil reconnect anyway when there is something to
-		   send. This just attempts reconnection if a node goes down!
-		*/
-		/* lowcomms_connect_sock(con); */
-		break;
-
-	default:
-		printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
-		break;
-	}
 }
 
 /* Make a socket active */
@@ -277,13 +264,12 @@ static int add_sock(struct socket *sock, struct connection *con)
 static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 			  int *addr_len)
 {
-        saddr->ss_family =  dlm_local_addr.ss_family;
-        if (saddr->ss_family == AF_INET) {
+	saddr->ss_family =  dlm_local_addr.ss_family;
+	if (saddr->ss_family == AF_INET) {
 		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
 		in4_addr->sin_port = cpu_to_be16(port);
 		*addr_len = sizeof(struct sockaddr_in);
-	}
-	else {
+	} else {
 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
 		in6_addr->sin6_port = cpu_to_be16(port);
 		*addr_len = sizeof(struct sockaddr_in6);
@@ -291,7 +277,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 }
 
 /* Close a remote connection and tidy up */
-static void close_connection(struct connection *con, int and_other)
+static void close_connection(struct connection *con, bool and_other)
 {
 	down_write(&con->sock_sem);
 
@@ -300,11 +286,8 @@ static void close_connection(struct connection *con, int and_other)
 		con->sock = NULL;
 	}
 	if (con->othercon && and_other) {
-		/* Argh! recursion in kernel code!
-		   Actually, this isn't a list so it
-		   will only re-enter once.
-		*/
-		close_connection(con->othercon, FALSE);
+		/* Will only re-enter once. */
+		close_connection(con->othercon, false);
 	}
 	if (con->rx_page) {
 		__free_page(con->rx_page);
@@ -337,7 +320,7 @@ static int receive_from_sock(struct connection *con)
 		con->rx_page = alloc_page(GFP_ATOMIC);
 		if (con->rx_page == NULL)
 			goto out_resched;
-		CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
+		cbuf_init(&con->cb, PAGE_CACHE_SIZE);
 	}
 
 	msg.msg_control = NULL;
@@ -352,16 +335,16 @@ static int receive_from_sock(struct connection *con)
 	 * iov[0] is the bit of the circular buffer between the current end
 	 * point (cb.base + cb.len) and the end of the buffer.
 	 */
-	iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
-	iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
+	iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
+	iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
 	iov[1].iov_len = 0;
 
 	/*
 	 * iov[1] is the bit of the circular buffer between the start of the
 	 * buffer and the start of the currently used section (cb.base)
 	 */
-	if (CBUF_DATA(&con->cb) >= con->cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
+	if (cbuf_data(&con->cb) >= con->cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
 		iov[1].iov_len = con->cb.base;
 		iov[1].iov_base = page_address(con->rx_page);
 		msg.msg_iovlen = 2;
@@ -378,7 +361,7 @@ static int receive_from_sock(struct connection *con)
 		goto out_close;
 	if (ret == len)
 		call_again_soon = 1;
-	CBUF_ADD(&con->cb, ret);
+	cbuf_add(&con->cb, ret);
 	ret = dlm_process_incoming_buffer(con->nodeid,
 					  page_address(con->rx_page),
 					  con->cb.base, con->cb.len,
@@ -391,35 +374,32 @@ static int receive_from_sock(struct connection *con)
 	}
 	if (ret < 0)
 		goto out_close;
-	CBUF_EAT(&con->cb, ret);
+	cbuf_eat(&con->cb, ret);
 
-	if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
+	if (cbuf_empty(&con->cb) && !call_again_soon) {
 		__free_page(con->rx_page);
 		con->rx_page = NULL;
 	}
 
-      out:
+out:
 	if (call_again_soon)
 		goto out_resched;
 	up_read(&con->sock_sem);
-	ret = 0;
-	goto out_ret;
+	return 0;
 
-      out_resched:
+out_resched:
 	lowcomms_data_ready(con->sock->sk, 0);
 	up_read(&con->sock_sem);
-	ret = 0;
-	schedule();
-	goto out_ret;
+	cond_resched();
+	return 0;
 
-      out_close:
+out_close:
 	up_read(&con->sock_sem);
 	if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
-		close_connection(con, FALSE);
+		close_connection(con, false);
 		/* Reconnect when there is something to send */
 	}
 
-      out_ret:
 	return ret;
 }
 
@@ -434,7 +414,8 @@ static int accept_from_sock(struct connection *con)
 	struct connection *newcon;
 
 	memset(&peeraddr, 0, sizeof(peeraddr));
-	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock);
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+				  IPPROTO_TCP, &newsock);
 	if (result < 0)
 		return -ENOMEM;
 
@@ -462,7 +443,7 @@ static int accept_from_sock(struct connection *con)
 	/* Get the new node's NODEID */
 	make_sockaddr(&peeraddr, 0, &len);
 	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
-	    	printk("dlm: connect from non cluster node\n");
+		printk("dlm: connect from non cluster node\n");
 		sock_release(newsock);
 		up_read(&con->sock_sem);
 		return -1;
@@ -483,17 +464,16 @@ static int accept_from_sock(struct connection *con)
 	}
 	down_write(&newcon->sock_sem);
 	if (newcon->sock) {
-	        struct connection *othercon = newcon->othercon;
+		struct connection *othercon = newcon->othercon;
 
 		if (!othercon) {
-			othercon = kmem_cache_alloc(con_cache, GFP_KERNEL);
+			othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
 			if (!othercon) {
 				printk("dlm: failed to allocate incoming socket\n");
 				up_write(&newcon->sock_sem);
 				result = -ENOMEM;
 				goto accept_err;
 			}
-			memset(othercon, 0, sizeof(*othercon));
 			othercon->nodeid = nodeid;
 			othercon->rx_action = receive_from_sock;
 			init_rwsem(&othercon->sock_sem);
@@ -523,7 +503,7 @@ static int accept_from_sock(struct connection *con)
 
 	return 0;
 
-      accept_err:
+accept_err:
 	up_read(&con->sock_sem);
 	sock_release(newsock);
 
@@ -533,7 +513,7 @@ static int accept_from_sock(struct connection *con)
 }
 
 /* Connect a new socket to its peer */
-static int connect_to_sock(struct connection *con)
+static void connect_to_sock(struct connection *con)
 {
 	int result = -EHOSTUNREACH;
 	struct sockaddr_storage saddr;
@@ -542,7 +522,7 @@ static int connect_to_sock(struct connection *con)
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
-		return 0;
+		return;
 	}
 
 	down_write(&con->sock_sem);
@@ -556,13 +536,14 @@ static int connect_to_sock(struct connection *con)
 	}
 
 	/* Create a socket to communicate with */
-	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+				  IPPROTO_TCP, &sock);
 	if (result < 0)
 		goto out_err;
 
 	memset(&saddr, 0, sizeof(saddr));
 	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
-	        goto out_err;
+		goto out_err;
 
 	sock->sk->sk_user_data = con;
 	con->rx_action = receive_from_sock;
@@ -574,22 +555,13 @@ static int connect_to_sock(struct connection *con)
 	log_print("connecting to %d", con->nodeid);
 	result =
 		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
-			       O_NONBLOCK);
+				   O_NONBLOCK);
 	if (result == -EINPROGRESS)
 		result = 0;
-	if (result != 0)
-		goto out_err;
-
-      out:
-	up_write(&con->sock_sem);
-	/*
-	 * Returning an error here means we've given up trying to connect to
-	 * a remote node, otherwise we return 0 and reschedule the connetion
-	 * attempt
-	 */
-	return result;
+	if (result == 0)
+		goto out;
 
-      out_err:
+out_err:
 	if (con->sock) {
 		sock_release(con->sock);
 		con->sock = NULL;
@@ -604,12 +576,15 @@ static int connect_to_sock(struct connection *con)
 		lowcomms_connect_sock(con);
 		result = 0;
 	}
-	goto out;
+out:
+	up_write(&con->sock_sem);
+	return;
 }
 
-static struct socket *create_listen_sock(struct connection *con, struct sockaddr_storage *saddr)
+static struct socket *create_listen_sock(struct connection *con,
+					 struct sockaddr_storage *saddr)
 {
-        struct socket *sock = NULL;
+	struct socket *sock = NULL;
 	mm_segment_t fs;
 	int result = 0;
 	int one = 1;
@@ -629,10 +604,12 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 
 	fs = get_fs();
 	set_fs(get_ds());
-	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
 	set_fs(fs);
 	if (result < 0) {
-		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
+		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",
+		       result);
 	}
 	sock->sk->sk_user_data = con;
 	con->rx_action = accept_from_sock;
@@ -652,7 +629,8 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 	fs = get_fs();
 	set_fs(get_ds());
 
-	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+				 (char *)&one, sizeof(one));
 	set_fs(fs);
 	if (result < 0) {
 		printk("dlm: Set keepalive failed: %d\n", result);
@@ -666,7 +644,7 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 		goto create_out;
 	}
 
-      create_out:
+create_out:
 	return sock;
 }
 
@@ -679,10 +657,6 @@ static int listen_for_all(void)
 	int result = -EINVAL;
 
 	/* We don't support multi-homed hosts */
-	memset(con, 0, sizeof(*con));
-	init_rwsem(&con->sock_sem);
-	spin_lock_init(&con->writequeue_lock);
-	INIT_LIST_HEAD(&con->writequeue);
 	set_bit(CF_IS_OTHERCON, &con->flags);
 
 	sock = create_listen_sock(con, &dlm_local_addr);
@@ -731,16 +705,12 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
 	int offset = 0;
 	int users = 0;
 
-	if (!atomic_read(&accepting))
-		return NULL;
-
 	con = nodeid2con(nodeid, allocation);
 	if (!con)
 		return NULL;
 
-	spin_lock(&con->writequeue_lock);
 	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &con->writequeue) ||
+	if ((&e->list == &con->writequeue) ||
 	    (PAGE_CACHE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
@@ -751,7 +721,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
 	spin_unlock(&con->writequeue_lock);
 
 	if (e) {
-	      got_one:
+	got_one:
 		if (users == 0)
 			kmap(e->page);
 		*ppc = page_address(e->page) + offset;
@@ -777,10 +747,6 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	struct connection *con = e->con;
 	int users;
 
-	if (!atomic_read(&accepting))
-		return;
-
-	spin_lock(&con->writequeue_lock);
 	users = --e->users;
 	if (users)
 		goto out;
@@ -797,7 +763,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	}
 	return;
 
-      out:
+out:
 	spin_unlock(&con->writequeue_lock);
 	return;
 }
@@ -809,7 +775,7 @@ static void free_entry(struct writequeue_entry *e)
 }
 
 /* Send a message */
-static int send_to_sock(struct connection *con)
+static void send_to_sock(struct connection *con)
 {
 	int ret = 0;
 	ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
@@ -846,7 +812,7 @@ static int send_to_sock(struct connection *con)
 		}
 		else {
 			/* Don't starve people filling buffers */
-			schedule();
+			cond_resched();
 		}
 
 		spin_lock(&con->writequeue_lock);
@@ -855,25 +821,26 @@ static int send_to_sock(struct connection *con)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
+			kunmap(e->page);
 			free_entry(e);
 			continue;
 		}
 	}
 	spin_unlock(&con->writequeue_lock);
-      out:
+out:
 	up_read(&con->sock_sem);
-	return ret;
+	return;
 
-      send_error:
+send_error:
 	up_read(&con->sock_sem);
-	close_connection(con, FALSE);
+	close_connection(con, false);
 	lowcomms_connect_sock(con);
-	return ret;
+	return;
 
-      out_connect:
+out_connect:
 	up_read(&con->sock_sem);
 	lowcomms_connect_sock(con);
-	return 0;
+	return;
 }
 
 static void clean_one_writequeue(struct connection *con)
@@ -904,12 +871,12 @@ int dlm_lowcomms_close(int nodeid)
 	con = nodeid2con(nodeid, 0);
 	if (con) {
 		clean_one_writequeue(con);
-		close_connection(con, TRUE);
+		close_connection(con, true);
 		atomic_set(&con->waiting_requests, 0);
 	}
 	return 0;
 
-      out:
+out:
 	return -1;
 }
 
@@ -940,7 +907,7 @@ static void process_sockets(void)
 	list_for_each_safe(list, temp, &read_sockets) {
 
 		struct connection *con =
-		    list_entry(list, struct connection, read_list);
+			list_entry(list, struct connection, read_list);
 		list_del(&con->read_list);
 		clear_bit(CF_READ_PENDING, &con->flags);
 
@@ -959,7 +926,7 @@ static void process_sockets(void)
 
 			/* Don't starve out everyone else */
 			if (++count >= MAX_RX_MSG_COUNT) {
-				schedule();
+				cond_resched();
 				count = 0;
 			}
 
@@ -977,20 +944,16 @@ static void process_output_queue(void)
 {
 	struct list_head *list;
 	struct list_head *temp;
-	int ret;
 
 	spin_lock_bh(&write_sockets_lock);
 	list_for_each_safe(list, temp, &write_sockets) {
 		struct connection *con =
-		    list_entry(list, struct connection, write_list);
+			list_entry(list, struct connection, write_list);
 		clear_bit(CF_WRITE_PENDING, &con->flags);
 		list_del(&con->write_list);
 
 		spin_unlock_bh(&write_sockets_lock);
-
-		ret = send_to_sock(con);
-		if (ret < 0) {
-		}
+		send_to_sock(con);
 		spin_lock_bh(&write_sockets_lock);
 	}
 	spin_unlock_bh(&write_sockets_lock);
@@ -1000,19 +963,16 @@ static void process_state_queue(void)
 {
 	struct list_head *list;
 	struct list_head *temp;
-	int ret;
 
 	spin_lock_bh(&state_sockets_lock);
 	list_for_each_safe(list, temp, &state_sockets) {
 		struct connection *con =
-		    list_entry(list, struct connection, state_list);
+			list_entry(list, struct connection, state_list);
 		list_del(&con->state_list);
 		clear_bit(CF_CONNECT_PENDING, &con->flags);
 		spin_unlock_bh(&state_sockets_lock);
 
-		ret = connect_to_sock(con);
-		if (ret < 0) {
-		}
+		connect_to_sock(con);
 		spin_lock_bh(&state_sockets_lock);
 	}
 	spin_unlock_bh(&state_sockets_lock);
@@ -1046,14 +1006,13 @@ static int read_list_empty(void)
 /* DLM Transport comms receive daemon */
 static int dlm_recvd(void *data)
 {
-	init_waitqueue_head(&lowcomms_recv_waitq);
 	init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
 	add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (read_list_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		process_sockets();
@@ -1081,14 +1040,13 @@ static int write_and_state_lists_empty(void)
 /* DLM Transport send daemon */
 static int dlm_sendd(void *data)
 {
-	init_waitqueue_head(&lowcomms_send_waitq);
 	init_waitqueue_entry(&lowcomms_send_waitq_head, current);
 	add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (write_and_state_lists_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		process_state_queue();
@@ -1111,7 +1069,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_recvd %d", error);
 		return error;
 	}
@@ -1119,7 +1077,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_sendd %d", error);
 		kthread_stop(recv_task);
 		return error;
@@ -1141,21 +1099,20 @@ void dlm_lowcomms_stop(void)
 {
 	int i;
 
-	atomic_set(&accepting, 0);
-
-	/* Set all the activity flags to prevent any
+	/* Set all the flags to prevent any
 	   socket activity.
 	*/
 	for (i = 0; i < conn_array_size; i++) {
 		if (connections[i])
-			connections[i]->flags |= 0x7;
+			connections[i]->flags |= 0xFF;
 	}
+
 	daemons_stop();
 	clean_writequeues();
 
 	for (i = 0; i < conn_array_size; i++) {
 		if (connections[i]) {
-			close_connection(connections[i], TRUE);
+			close_connection(connections[i], true);
 			if (connections[i]->othercon)
 				kmem_cache_free(con_cache, connections[i]->othercon);
 			kmem_cache_free(con_cache, connections[i]);
@@ -1173,24 +1130,12 @@ int dlm_lowcomms_start(void)
 {
 	int error = 0;
 
-	error = -ENOTCONN;
-
-	/*
-	 * Temporarily initialise the waitq head so that lowcomms_send_message
-	 * doesn't crash if it gets called before the thread is fully
-	 * initialised
-	 */
-	init_waitqueue_head(&lowcomms_send_waitq);
-
 	error = -ENOMEM;
-	connections = kmalloc(sizeof(struct connection *) *
+	connections = kzalloc(sizeof(struct connection *) *
 			      NODE_INCREMENT, GFP_KERNEL);
 	if (!connections)
 		goto out;
 
-	memset(connections, 0,
-	       sizeof(struct connection *) * NODE_INCREMENT);
-
 	conn_array_size = NODE_INCREMENT;
 
 	if (dlm_our_addr(&dlm_local_addr, 0)) {
@@ -1203,7 +1148,8 @@ int dlm_lowcomms_start(void)
 	}
 
 	con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
-				      __alignof__(struct connection), 0, NULL, NULL);
+				      __alignof__(struct connection), 0,
+				      NULL, NULL);
 	if (!con_cache)
 		goto fail_free_conn;
 
@@ -1217,40 +1163,20 @@ int dlm_lowcomms_start(void)
 	if (error)
 		goto fail_unlisten;
 
-	atomic_set(&accepting, 1);
-
 	return 0;
 
-      fail_unlisten:
-	close_connection(connections[0], 0);
+fail_unlisten:
+	close_connection(connections[0], false);
 	kmem_cache_free(con_cache, connections[0]);
 	kmem_cache_destroy(con_cache);
 
-      fail_free_conn:
+fail_free_conn:
 	kfree(connections);
 
-      out:
+out:
 	return error;
 }
 
-int dlm_lowcomms_init(void)
-{
-	INIT_LIST_HEAD(&read_sockets);
-	INIT_LIST_HEAD(&write_sockets);
-	INIT_LIST_HEAD(&state_sockets);
-
-	spin_lock_init(&read_sockets_lock);
-	spin_lock_init(&write_sockets_lock);
-	spin_lock_init(&state_sockets_lock);
-	init_MUTEX(&connections_lock);
-
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-}
-
 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 2d045e0daae..a9a9618c0d3 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -14,8 +14,6 @@
 #ifndef __LOWCOMMS_DOT_H__
 #define __LOWCOMMS_DOT_H__
 
-int dlm_lowcomms_init(void);
-void dlm_lowcomms_exit(void);
 int dlm_lowcomms_start(void);
 void dlm_lowcomms_stop(void);
 int dlm_lowcomms_close(int nodeid);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index a8da8dc36b2..162fbae58fe 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -16,7 +16,6 @@
 #include "lock.h"
 #include "user.h"
 #include "memory.h"
-#include "lowcomms.h"
 #include "config.h"
 
 #ifdef CONFIG_DLM_DEBUG
@@ -47,20 +46,14 @@ static int __init init_dlm(void)
 	if (error)
 		goto out_config;
 
-	error = dlm_lowcomms_init();
-	if (error)
-		goto out_debug;
-
 	error = dlm_user_init();
 	if (error)
-		goto out_lowcomms;
+		goto out_debug;
 
 	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
- out_lowcomms:
-	dlm_lowcomms_exit();
  out_debug:
 	dlm_unregister_debugfs();
  out_config:
@@ -76,7 +69,6 @@ static int __init init_dlm(void)
 static void __exit exit_dlm(void)
 {
 	dlm_user_exit();
-	dlm_lowcomms_exit();
 	dlm_config_exit();
 	dlm_memory_exit();
 	dlm_lockspace_exit();
-- 
cgit v1.2.3


From 822191a2fa1584a29c3224ab328507adcaeac1ab Mon Sep 17 00:00:00 2001
From: Andrey Mirkin <amirkin@openvz.org>
Date: Wed, 6 Dec 2006 20:31:35 -0800
Subject: [PATCH] skip data conversion in compat_sys_mount when data_page is
 NULL

OpenVZ Linux kernel team has found a problem with mounting in compat mode.

Simple command "mount -t smbfs ..." on Fedora Core 5 distro in 32-bit mode
leads to oops:

  Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: compat_sys_mount+0xd6/0x290
  Process mount (pid: 14656, veid=300, threadinfo ffff810034d30000, task ffff810034c86bc0)
  Call Trace: ia32_sysret+0x0/0xa

The problem is that data_page pointer can be NULL, so we should skip data
conversion in this case.

Signed-off-by: Andrey Mirkin <amirkin@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 06dad665b88..7aef5412f49 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -871,7 +871,7 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
 
 	retval = -EINVAL;
 
-	if (type_page) {
+	if (type_page && data_page) {
 		if (!strcmp((char *)type_page, SMBFS_NAME)) {
 			do_smb_super_data_conv((void *)data_page);
 		} else if (!strcmp((char *)type_page, NCPFS_NAME)) {
-- 
cgit v1.2.3


From 8fb4fc68ca391862b061b3d358a288ccf6abed39 Mon Sep 17 00:00:00 2001
From: Guillem Jover <guillem.jover@nokia.com>
Date: Wed, 6 Dec 2006 20:32:24 -0800
Subject: [PATCH] Allow user processes to raise their oom_adj value

Currently a user process cannot rise its own oom_adj value (i.e.
unprotecting itself from the OOM killer).  As this value is stored in the
task structure it gets inherited and the unprivileged childs will be unable
to rise it.

The EPERM will be handled by the generic proc fs layer, as only processes
with the proper caps or the owner of the process will be able to write to
the file.  So we allow only the processes with CAP_SYS_RESOURCE to lower
the value, otherwise it will get an EACCES which seems more appropriate
than EPERM.

Signed-off-by: Guillem Jover <guillem.jover@nokia.com>
Acked-by: Andrea Arcangeli <andrea@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 795319c54f7..05ace70d051 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -683,8 +683,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	char buffer[PROC_NUMBUF], *end;
 	int oom_adjust;
 
-	if (!capable(CAP_SYS_RESOURCE))
-		return -EPERM;
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
@@ -699,6 +697,10 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_dentry->d_inode);
 	if (!task)
 		return -ESRCH;
+	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+		put_task_struct(task);
+		return -EACCES;
+	}
 	task->oomkilladj = oom_adjust;
 	put_task_struct(task);
 	if (end - buffer == 0)
-- 
cgit v1.2.3


From e6b4f8da3a88457148038bc952043e99a7fdba64 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:14 -0800
Subject: [PATCH] slab: remove SLAB_NOFS

SLAB_NOFS is an alias of GFP_NOFS.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/misc.c       | 4 ++--
 fs/cifs/transport.c  | 2 +-
 fs/dquot.c           | 2 +-
 fs/ext3/super.c      | 2 +-
 fs/ext4/super.c      | 2 +-
 fs/hpfs/super.c      | 2 +-
 fs/nfs/read.c        | 2 +-
 fs/nfs/write.c       | 4 ++--
 fs/ntfs/attrib.c     | 2 +-
 fs/ntfs/index.c      | 2 +-
 fs/ntfs/inode.c      | 4 ++--
 fs/ntfs/unistr.c     | 2 +-
 fs/ocfs2/dlm/dlmfs.c | 2 +-
 fs/ocfs2/super.c     | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bbc9cd34b6e..8355daff504 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -153,7 +153,7 @@ cifs_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | SLAB_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS);
 
 	/* clear the first few header bytes */
 	/* for most paths, more is cleared in header_assemble */
@@ -192,7 +192,7 @@ cifs_small_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | SLAB_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS);
 	if (ret_buf) {
 	/* No need to clear memory here, cleared in header assemble */
 	/*	memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 48d47b46b1f..7514237cf31 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	}
 	
 	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
-						    SLAB_KERNEL | SLAB_NOFS);
+						    SLAB_KERNEL | GFP_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
diff --git a/fs/dquot.c b/fs/dquot.c
index 9af789567e5..c6ae6c0e0cf 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -600,7 +600,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 
-	dquot = kmem_cache_alloc(dquot_cachep, SLAB_NOFS);
+	dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS);
 	if(!dquot)
 		return NODQUOT;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index afc2d4f42d7..0cf633f0cfa 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -445,7 +445,7 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 {
 	struct ext3_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b4b022aa2bc..c730cbc8403 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -495,7 +495,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 {
 	struct ext4_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 450b5e0b478..46ceadd6f16 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -165,7 +165,7 @@ static kmem_cache_t * hpfs_inode_cachep;
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
 	struct hpfs_inode_info *ei;
-	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, SLAB_NOFS);
+	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c2e49c397a2..56f66f0ccb6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,7 +46,7 @@ static mempool_t *nfs_rdata_mempool;
 struct nfs_read_data *nfs_readdata_alloc(size_t len)
 {
 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 883dd4a1c15..f7dd0d00595 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -93,7 +93,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
 struct nfs_write_data *nfs_commit_alloc(void)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
@@ -112,7 +112,7 @@ void nfs_commit_free(struct nfs_write_data *p)
 struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 9f08e851cfb..c577d8e1bd9 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1272,7 +1272,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
 {
 	ntfs_attr_search_ctx *ctx;
 
-	ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS);
+	ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS);
 	if (ctx)
 		ntfs_attr_init_search_ctx(ctx, ni, mrec);
 	return ctx;
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index e32cde48636..2194eff4974 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -38,7 +38,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 {
 	ntfs_index_context *ictx;
 
-	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
+	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
 	if (ictx)
 		*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
 	return ictx;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2d3de9c8981..247989891b4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -324,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return VFS_I(ni);
@@ -349,7 +349,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return ni;
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 6a495f7369f..005ca4b0f13 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -266,7 +266,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 
 	/* We do not trust outside sources. */
 	if (likely(ins)) {
-		ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
+		ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
 		if (likely(ucs)) {
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 16b8d1ba706..01f91501f70 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -276,7 +276,7 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
 {
 	struct dlmfs_inode_private *ip;
 
-	ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS);
+	ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS);
 	if (!ip)
 		return NULL;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d9b4214a12d..7574d26ee0f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -303,7 +303,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 {
 	struct ocfs2_inode_info *oi;
 
-	oi = kmem_cache_alloc(ocfs2_inode_cachep, SLAB_NOFS);
+	oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS);
 	if (!oi)
 		return NULL;
 
-- 
cgit v1.2.3


From f7267c0c0721fd02ad3dc37c3d6dd24ccd81d4d6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:15 -0800
Subject: [PATCH] slab: remove SLAB_USER

SLAB_USER is an alias of GFP_USER

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c | 4 ++--
 fs/ecryptfs/inode.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f63a7755fe8..776b2eed371 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1334,7 +1334,7 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
 		goto out;
 	}
 	/* Released in this function */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER);
+	page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER);
 	if (!page_virt) {
 		ecryptfs_printk(KERN_ERR, "Out of memory\n");
 		rc = -ENOMEM;
@@ -1493,7 +1493,7 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
 	    &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
 
 	/* Read the first page from the underlying file */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER);
+	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index dfcc68484f4..70911412044 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -404,7 +404,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	/* Released in this function */
 	page_virt =
 	    (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
-				     SLAB_USER);
+				     GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR,
-- 
cgit v1.2.3


From e94b1766097d53e6f3ccfb36c8baa562ffeda3fc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:17 -0800
Subject: [PATCH] slab: remove SLAB_KERNEL

SLAB_KERNEL is an alias of GFP_KERNEL.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c          | 2 +-
 fs/affs/super.c          | 2 +-
 fs/afs/super.c           | 2 +-
 fs/befs/linuxvfs.c       | 2 +-
 fs/bfs/inode.c           | 2 +-
 fs/block_dev.c           | 2 +-
 fs/cifs/cifsfs.c         | 2 +-
 fs/cifs/misc.c           | 4 ++--
 fs/cifs/transport.c      | 4 ++--
 fs/coda/inode.c          | 2 +-
 fs/dnotify.c             | 2 +-
 fs/ecryptfs/crypto.c     | 2 +-
 fs/ecryptfs/file.c       | 2 +-
 fs/ecryptfs/inode.c      | 4 ++--
 fs/ecryptfs/keystore.c   | 2 +-
 fs/ecryptfs/main.c       | 4 ++--
 fs/ecryptfs/super.c      | 2 +-
 fs/efs/super.c           | 2 +-
 fs/eventpoll.c           | 4 ++--
 fs/exec.c                | 2 +-
 fs/ext2/super.c          | 2 +-
 fs/fat/cache.c           | 2 +-
 fs/fat/inode.c           | 2 +-
 fs/fcntl.c               | 2 +-
 fs/freevxfs/vxfs_inode.c | 4 ++--
 fs/fuse/dev.c            | 2 +-
 fs/fuse/inode.c          | 2 +-
 fs/hfs/super.c           | 2 +-
 fs/hfsplus/super.c       | 2 +-
 fs/hugetlbfs/inode.c     | 2 +-
 fs/inode.c               | 2 +-
 fs/isofs/inode.c         | 2 +-
 fs/jffs2/super.c         | 2 +-
 fs/locks.c               | 2 +-
 fs/minix/inode.c         | 2 +-
 fs/ncpfs/inode.c         | 2 +-
 fs/nfs/direct.c          | 2 +-
 fs/nfs/inode.c           | 2 +-
 fs/nfs/pagelist.c        | 2 +-
 fs/openpromfs/inode.c    | 2 +-
 fs/proc/inode.c          | 2 +-
 fs/qnx4/inode.c          | 2 +-
 fs/reiserfs/super.c      | 2 +-
 fs/romfs/inode.c         | 2 +-
 fs/smbfs/inode.c         | 2 +-
 fs/smbfs/request.c       | 2 +-
 fs/sysv/inode.c          | 2 +-
 fs/udf/super.c           | 2 +-
 fs/ufs/super.c           | 2 +-
 49 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9ade139086f..52eb10ca654 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -217,7 +217,7 @@ static kmem_cache_t *adfs_inode_cachep;
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
 	struct adfs_inode_info *ei;
-	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5ea72c3a16c..81c73ec09f6 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,7 +71,7 @@ static kmem_cache_t * affs_inode_cachep;
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
 	struct affs_inode_info *ei;
-	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, SLAB_KERNEL);
+	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 67d1f5c819e..c6ead009bf7 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -412,7 +412,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	struct afs_vnode *vnode;
 
 	vnode = (struct afs_vnode *)
-		kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL);
+		kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
 	if (!vnode)
 		return NULL;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 07f7144f0e2..995348df94a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb)
 {
         struct befs_inode_info *bi;
         bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep,
-							SLAB_KERNEL);
+							GFP_KERNEL);
         if (!bi)
                 return NULL;
         return &bi->vfs_inode;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index ed27ffb3459..2e45123c8f7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -233,7 +233,7 @@ static kmem_cache_t * bfs_inode_cachep;
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
 	struct bfs_inode_info *bi;
-	bi = kmem_cache_alloc(bfs_inode_cachep, SLAB_KERNEL);
+	bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL);
 	if (!bi)
 		return NULL;
 	return &bi->vfs_inode;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c0e7af9d0..063506705f2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -239,7 +239,7 @@ static kmem_cache_t * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
-	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL);
+	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84976cdbe71..84168629cea 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -245,7 +245,7 @@ static struct inode *
 cifs_alloc_inode(struct super_block *sb)
 {
 	struct cifsInodeInfo *cifs_inode;
-	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, SLAB_KERNEL);
+	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
 	if (!cifs_inode)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8355daff504..aedf683f011 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -153,7 +153,7 @@ cifs_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS);
 
 	/* clear the first few header bytes */
 	/* for most paths, more is cleared in header_assemble */
@@ -192,7 +192,7 @@ cifs_small_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS);
 	if (ret_buf) {
 	/* No need to clear memory here, cleared in header assemble */
 	/*	memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7514237cf31..1f727765a8e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	}
 	
 	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
-						    SLAB_KERNEL | GFP_NOFS);
+						    GFP_KERNEL | GFP_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
@@ -118,7 +118,7 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon)
 		return NULL;
 	}
 	temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep,
-						       SLAB_KERNEL);
+						       GFP_KERNEL);
 	if (temp == NULL)
 		return temp;
 	else {
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 88d12332116..50cedd2617d 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -43,7 +43,7 @@ static kmem_cache_t * coda_inode_cachep;
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
 	struct coda_inode_info *ei;
-	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, SLAB_KERNEL);
+	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	memset(&ei->c_fid, 0, sizeof(struct CodaFid));
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 2b0442db67e..e778b1737b7 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -77,7 +77,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	inode = filp->f_dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL);
+	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
 	if (dn == NULL)
 		return -ENOMEM;
 	spin_lock(&inode->i_lock);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 776b2eed371..7196f50fe15 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -628,7 +628,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page)
 	num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
 	base_extent = (page->index * num_extents_per_page);
 	lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
-					   SLAB_KERNEL);
+					   GFP_KERNEL);
 	if (!lower_page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a92ef05eff8..42099e779a5 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -250,7 +250,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	int lower_flags;
 
 	/* Released in ecryptfs_release or end of function if failure */
-	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
+	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
 	if (!file_info) {
 		ecryptfs_printk(KERN_ERR,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 70911412044..8a1945a84c3 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -369,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	BUG_ON(!atomic_read(&lower_dentry->d_count));
 	ecryptfs_set_dentry_private(dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(dentry)) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
@@ -795,7 +795,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	/* Released at out_free: label */
 	ecryptfs_set_file_private(&fake_ecryptfs_file,
 				  kmem_cache_alloc(ecryptfs_file_info_cache,
-						   SLAB_KERNEL));
+						   GFP_KERNEL));
 	if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c3746f56d16..745c0f1bfbb 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -207,7 +207,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
 	 * at end of function upon failure */
 	auth_tok_list_item =
-	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
+	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
 	if (!auth_tok_list_item) {
 		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
 		rc = -ENOMEM;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a78d87d14ba..a2c6ccbce30 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -378,7 +378,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
 					kmem_cache_alloc(ecryptfs_sb_info_cache,
-							 SLAB_KERNEL));
+							 GFP_KERNEL));
 	if (!ecryptfs_superblock_to_private(sb)) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
@@ -402,7 +402,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* through deactivate_super(sb) from get_sb_nodev() */
 	ecryptfs_set_dentry_private(sb->s_root,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(sb->s_root)) {
 		ecryptfs_printk(KERN_ERR,
 				"dentry_info_cache alloc failed\n");
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 825757ae486..eaa5daaf106 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -50,7 +50,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 	struct inode *inode = NULL;
 
 	ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
-					  SLAB_KERNEL);
+					  GFP_KERNEL);
 	if (unlikely(!ecryptfs_inode))
 		goto out;
 	ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index b3f50651eb6..69b15a996cf 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -57,7 +57,7 @@ static kmem_cache_t * efs_inode_cachep;
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
 	struct efs_inode_info *ei;
-	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, SLAB_KERNEL);
+	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae228ec54e9..f5c88435c6b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -961,7 +961,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
+	if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
diff --git a/fs/exec.c b/fs/exec.c
index d993ea1a81a..2092bd20746 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -404,7 +404,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d8b9abd95d0..85c237e7385 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -140,7 +140,7 @@ static kmem_cache_t * ext2_inode_cachep;
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
 	struct ext2_inode_info *ei;
-	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL);
+	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 82cc4f59e3b..8c272278455 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -63,7 +63,7 @@ void fat_cache_destroy(void)
 
 static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
 {
-	return kmem_cache_alloc(fat_cache_cachep, SLAB_KERNEL);
+	return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL);
 }
 
 static inline void fat_cache_free(struct fat_cache *cache)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 78945b53b0f..b58fd0c9f3c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -482,7 +482,7 @@ static kmem_cache_t *fat_inode_cachep;
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
 	struct msdos_inode_info *ei;
-	ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e4f26165f12..c03dc9cb21c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -567,7 +567,7 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 	int result = 0;
 
 	if (on) {
-		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
+		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 		if (!new)
 			return -ENOMEM;
 	}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 4786d51ad3b..d2dd0d70007 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -103,7 +103,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
 		struct vxfs_inode_info	*vip;
 		struct vxfs_dinode	*dip;
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(bp->b_data + offset);
 		memcpy(vip, dip, sizeof(*vip));
@@ -145,7 +145,7 @@ __vxfs_iget(ino_t ino, struct inode *ilistp)
 		struct vxfs_dinode	*dip;
 		caddr_t			kaddr = (char *)page_address(pp);
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(kaddr + offset);
 		memcpy(vip, dip, sizeof(*vip));
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 66571eafbb1..8c15139f275 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -41,7 +41,7 @@ static void fuse_request_init(struct fuse_req *req)
 
 struct fuse_req *fuse_request_alloc(void)
 {
-	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
 	if (req)
 		fuse_request_init(req);
 	return req;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fc420357037..e039e2047cc 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -46,7 +46,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	struct inode *inode;
 	struct fuse_inode *fi;
 
-	inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+	inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
 	if (!inode)
 		return NULL;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 85b17b3fa4a..ffc6409132c 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -145,7 +145,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 {
 	struct hfs_inode_info *i;
 
-	i = kmem_cache_alloc(hfs_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 194eede52fa..4a0c70c76c8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -440,7 +440,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
 	struct hfsplus_inode_info *i;
 
-	i = kmem_cache_alloc(hfsplus_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7f4756963d0..36e52173a54 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -522,7 +522,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 
 	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
 		return NULL;
-	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
 	if (unlikely(!p)) {
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 26cdb115ce6..dd15984d51a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -109,7 +109,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 	if (sb->s_op->alloc_inode)
 		inode = sb->s_op->alloc_inode(sb);
 	else
-		inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
+		inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
 	if (inode) {
 		struct address_space * const mapping = &inode->i_data;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index c34b862cdbf..4b6381cd2cf 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -62,7 +62,7 @@ static kmem_cache_t *isofs_inode_cachep;
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
 	struct iso_inode_info *ei;
-	ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bc4b8106a49..77be534ce42 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -33,7 +33,7 @@ static kmem_cache_t *jffs2_inode_cachep;
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
 	struct jffs2_inode_info *ei;
-	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, SLAB_KERNEL);
+	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/locks.c b/fs/locks.c
index e0b6a80649a..a7b97d50c1e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -147,7 +147,7 @@ static kmem_cache_t *filelock_cache __read_mostly;
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
 
 static void locks_release_private(struct file_lock *fl)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1e36bae4d0e..ce532c2deda 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -56,7 +56,7 @@ static kmem_cache_t * minix_inode_cachep;
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
 	struct minix_inode_info *ei;
-	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, SLAB_KERNEL);
+	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 72dad552aa0..ed84d899220 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -45,7 +45,7 @@ static kmem_cache_t * ncp_inode_cachep;
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
 	struct ncp_inode_info *ei;
-	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, SLAB_KERNEL);
+	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bdfabf854a5..769fd0a0c77 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -143,7 +143,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
 	if (!dreq)
 		return NULL;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 08cc4c5919a..6b53aae4ed2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1080,7 +1080,7 @@ void nfs4_clear_inode(struct inode *inode)
 struct inode *nfs_alloc_inode(struct super_block *sb)
 {
 	struct nfs_inode *nfsi;
-	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
+	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0UL;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 829af323f28..a1561a820ab 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,7 +26,7 @@ static inline struct nfs_page *
 nfs_page_alloc(void)
 {
 	struct nfs_page	*p;
-	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->wb_list);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 592a6402e85..911d1bcfc56 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -336,7 +336,7 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
 	struct op_inode_info *oi;
 
-	oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
+	oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL);
 	if (!oi)
 		return NULL;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 49dfb2ab783..b24cdb2f17c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -88,7 +88,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
+	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->pid = NULL;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5a41db2a218..5b943eb11d7 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -520,7 +520,7 @@ static kmem_cache_t *qnx4_inode_cachep;
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
 	struct qnx4_inode_info *ei;
-	ei = kmem_cache_alloc(qnx4_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 17249994110..32332516d65 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -496,7 +496,7 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
 	struct reiserfs_inode_info *ei;
 	ei = (struct reiserfs_inode_info *)
-	    kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL);
+	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index ddcd9e1ef28..d1b455f9b66 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -555,7 +555,7 @@ static kmem_cache_t * romfs_inode_cachep;
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
 	struct romfs_inode_info *ei;
-	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 2c122ee83ad..22161710348 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -55,7 +55,7 @@ static kmem_cache_t *smb_inode_cachep;
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
 	struct smb_inode_info *ei;
-	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, SLAB_KERNEL);
+	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 0fb74697abc..3eb1402191b 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
 	struct smb_request *req;
 	unsigned char *buf = NULL;
 
-	req = kmem_cache_alloc(req_cachep, SLAB_KERNEL);
+	req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
 	VERBOSE("allocating request: %p\n", req);
 	if (!req)
 		goto out;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d63c5e48b05..a6ca12b747c 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -307,7 +307,7 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
 	struct sysv_inode_info *si;
 
-	si = kmem_cache_alloc(sysv_inode_cachep, SLAB_KERNEL);
+	si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL);
 	if (!si)
 		return NULL;
 	return &si->vfs_inode;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1aea6a4f9a4..e50f24221de 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -112,7 +112,7 @@ static kmem_cache_t * udf_inode_cachep;
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
 	struct udf_inode_info *ei;
-	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
+	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ec79e3091d1..85a88c0c5e6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1209,7 +1209,7 @@ static kmem_cache_t * ufs_inode_cachep;
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
 	struct ufs_inode_info *ei;
-	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, SLAB_KERNEL);
+	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
-- 
cgit v1.2.3


From e18b890bb0881bbab6f4f1a6cd20d9c60d66b003 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:20 -0800
Subject: [PATCH] slab: remove kmem_cache_t

Replace all uses of kmem_cache_t with struct kmem_cache.

The patch was generated using the following script:

	#!/bin/sh
	#
	# Replace one string by another in all the kernel sources.
	#

	set -e

	for file in `find * -name "*.c" -o -name "*.h"|xargs grep -l $1`; do
		quilt add $file
		sed -e "1,\$s/$1/$2/g" $file >/tmp/$$
		mv /tmp/$$ $file
		quilt refresh
	done

The script was run like this

	sh replace kmem_cache_t "struct kmem_cache"

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c                 |  4 ++--
 fs/affs/super.c                 |  4 ++--
 fs/afs/super.c                  |  6 +++---
 fs/aio.c                        |  4 ++--
 fs/befs/linuxvfs.c              |  4 ++--
 fs/bfs/inode.c                  |  4 ++--
 fs/bio.c                        |  4 ++--
 fs/block_dev.c                  |  4 ++--
 fs/buffer.c                     |  4 ++--
 fs/cifs/cifsfs.c                | 14 +++++++-------
 fs/cifs/transport.c             |  2 +-
 fs/coda/inode.c                 |  4 ++--
 fs/configfs/configfs_internal.h |  2 +-
 fs/configfs/mount.c             |  2 +-
 fs/dcache.c                     |  6 +++---
 fs/dcookies.c                   |  2 +-
 fs/dlm/memory.c                 |  2 +-
 fs/dnotify.c                    |  2 +-
 fs/dquot.c                      |  2 +-
 fs/ecryptfs/main.c              |  2 +-
 fs/efs/super.c                  |  4 ++--
 fs/eventpoll.c                  |  4 ++--
 fs/ext2/super.c                 |  4 ++--
 fs/ext3/super.c                 |  4 ++--
 fs/ext4/super.c                 |  4 ++--
 fs/fat/cache.c                  |  4 ++--
 fs/fat/inode.c                  |  4 ++--
 fs/fcntl.c                      |  2 +-
 fs/freevxfs/vxfs_inode.c        |  2 +-
 fs/fuse/dev.c                   |  2 +-
 fs/fuse/inode.c                 |  4 ++--
 fs/gfs2/main.c                  |  4 ++--
 fs/gfs2/util.c                  |  6 +++---
 fs/gfs2/util.h                  |  6 +++---
 fs/hfs/super.c                  |  4 ++--
 fs/hfsplus/super.c              |  4 ++--
 fs/hpfs/super.c                 |  4 ++--
 fs/hugetlbfs/inode.c            |  4 ++--
 fs/inode.c                      |  4 ++--
 fs/inotify_user.c               |  4 ++--
 fs/isofs/inode.c                |  4 ++--
 fs/jbd/journal.c                |  6 +++---
 fs/jbd/revoke.c                 |  4 ++--
 fs/jbd2/journal.c               |  6 +++---
 fs/jbd2/revoke.c                |  4 ++--
 fs/jffs/inode-v23.c             |  4 ++--
 fs/jffs/jffs_fm.c               |  4 ++--
 fs/jffs2/malloc.c               | 18 +++++++++---------
 fs/jffs2/super.c                |  4 ++--
 fs/jfs/jfs_metapage.c           |  4 ++--
 fs/jfs/super.c                  |  4 ++--
 fs/locks.c                      |  4 ++--
 fs/mbcache.c                    |  2 +-
 fs/minix/inode.c                |  4 ++--
 fs/namespace.c                  |  2 +-
 fs/ncpfs/inode.c                |  4 ++--
 fs/nfs/direct.c                 |  2 +-
 fs/nfs/inode.c                  |  4 ++--
 fs/nfs/pagelist.c               |  2 +-
 fs/nfs/read.c                   |  2 +-
 fs/nfs/write.c                  |  2 +-
 fs/nfsd/nfs4state.c             | 10 +++++-----
 fs/ocfs2/dlm/dlmfs.c            |  4 ++--
 fs/ocfs2/dlm/dlmmaster.c        |  2 +-
 fs/ocfs2/extent_map.c           |  2 +-
 fs/ocfs2/inode.h                |  2 +-
 fs/ocfs2/super.c                |  4 ++--
 fs/ocfs2/uptodate.c             |  2 +-
 fs/openpromfs/inode.c           |  4 ++--
 fs/proc/inode.c                 |  4 ++--
 fs/qnx4/inode.c                 |  4 ++--
 fs/reiserfs/super.c             |  4 ++--
 fs/romfs/inode.c                |  4 ++--
 fs/smbfs/inode.c                |  4 ++--
 fs/smbfs/request.c              |  2 +-
 fs/sysfs/mount.c                |  2 +-
 fs/sysfs/sysfs.h                |  2 +-
 fs/sysv/inode.c                 |  4 ++--
 fs/udf/super.c                  |  4 ++--
 fs/ufs/super.c                  |  4 ++--
 80 files changed, 157 insertions(+), 157 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 52eb10ca654..e5a205c9f42 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -212,7 +212,7 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t *adfs_inode_cachep;
+static struct kmem_cache *adfs_inode_cachep;
 
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
@@ -228,7 +228,7 @@ static void adfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 81c73ec09f6..3de93e79994 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -66,7 +66,7 @@ affs_write_super(struct super_block *sb)
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
-static kmem_cache_t * affs_inode_cachep;
+static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
@@ -83,7 +83,7 @@ static void affs_destroy_inode(struct inode *inode)
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c6ead009bf7..9a351c4c756 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -35,7 +35,7 @@ struct afs_mount_params {
 	struct afs_volume	*volume;
 };
 
-static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
+static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
 			    unsigned long flags);
 
 static int afs_get_sb(struct file_system_type *fs_type,
@@ -65,7 +65,7 @@ static struct super_operations afs_super_ops = {
 	.put_super	= afs_put_super,
 };
 
-static kmem_cache_t *afs_inode_cachep;
+static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
 
 /*****************************************************************************/
@@ -384,7 +384,7 @@ static void afs_put_super(struct super_block *sb)
 /*
  * initialise an inode cache slab element prior to any use
  */
-static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep,
+static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct afs_vnode *vnode = (struct afs_vnode *) _vnode;
diff --git a/fs/aio.c b/fs/aio.c
index 287a1bc7a18..13aa9298abf 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -47,8 +47,8 @@ unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
 /*----end sysctl variables---*/
 
-static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+static struct kmem_cache	*kiocb_cachep;
+static struct kmem_cache	*kioctx_cachep;
 
 static struct workqueue_struct *aio_wq;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 995348df94a..bce402eee55 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -61,7 +61,7 @@ static const struct super_operations befs_sops = {
 };
 
 /* slab cache for befs_inode_info objects */
-static kmem_cache_t *befs_inode_cachep;
+static struct kmem_cache *befs_inode_cachep;
 
 static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 	
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 2e45123c8f7..eac175ed9f4 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -228,7 +228,7 @@ static void bfs_write_super(struct super_block *s)
 	unlock_kernel();
 }
 
-static kmem_cache_t * bfs_inode_cachep;
+static struct kmem_cache * bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
@@ -244,7 +244,7 @@ static void bfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bfs_inode_info *bi = foo;
 
diff --git a/fs/bio.c b/fs/bio.c
index 50c40ce2cea..7ec737eda72 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab __read_mostly;
+static struct kmem_cache *bio_slab __read_mostly;
 
 #define BIOVEC_NR_POOLS 6
 
@@ -44,7 +44,7 @@ mempool_t *bio_split_pool __read_mostly;
 struct biovec_slab {
 	int nr_vecs;
 	char *name; 
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 };
 
 /*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 063506705f2..13816b4d76f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static kmem_cache_t * bdev_cachep __read_mostly;
+static struct kmem_cache * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
@@ -253,7 +253,7 @@ static void bdev_destroy_inode(struct inode *inode)
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
 	struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index 35527dca1db..a8ca0ac2148 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2908,7 +2908,7 @@ asmlinkage long sys_bdflush(int func, long data)
 /*
  * Buffer-head allocation
  */
-static kmem_cache_t *bh_cachep;
+static struct kmem_cache *bh_cachep;
 
 /*
  * Once the number of bh's in the machine exceeds this level, we start
@@ -2961,7 +2961,7 @@ void free_buffer_head(struct buffer_head *bh)
 EXPORT_SYMBOL(free_buffer_head);
 
 static void
-init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
+init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 			    SLAB_CTOR_CONSTRUCTOR) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84168629cea..e6b5866e500 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -81,7 +81,7 @@ extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static int
 cifs_read_super(struct super_block *sb, void *data,
@@ -232,11 +232,11 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
 		return generic_permission(inode, mask, NULL);
 }
 
-static kmem_cache_t *cifs_inode_cachep;
-static kmem_cache_t *cifs_req_cachep;
-static kmem_cache_t *cifs_mid_cachep;
-kmem_cache_t *cifs_oplock_cachep;
-static kmem_cache_t *cifs_sm_req_cachep;
+static struct kmem_cache *cifs_inode_cachep;
+static struct kmem_cache *cifs_req_cachep;
+static struct kmem_cache *cifs_mid_cachep;
+struct kmem_cache *cifs_oplock_cachep;
+static struct kmem_cache *cifs_sm_req_cachep;
 mempool_t *cifs_sm_req_poolp;
 mempool_t *cifs_req_poolp;
 mempool_t *cifs_mid_poolp;
@@ -668,7 +668,7 @@ const struct file_operations cifs_dir_ops = {
 };
 
 static void
-cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags)
+cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct cifsInodeInfo *cifsi = inode;
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1f727765a8e..f80007eaebf 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -34,7 +34,7 @@
 #include "cifs_debug.h"
   
 extern mempool_t *cifs_mid_poolp;
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static struct mid_q_entry *
 AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 50cedd2617d..b64659fa82d 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -38,7 +38,7 @@ static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
 static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
-static kmem_cache_t * coda_inode_cachep;
+static struct kmem_cache * coda_inode_cachep;
 
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
 
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 3f4ff7a242b..f92cd303d2c 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,7 +49,7 @@ struct configfs_dirent {
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
 extern struct vfsmount * configfs_mount;
-extern kmem_cache_t *configfs_dir_cachep;
+extern struct kmem_cache *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 68bd5c93ca5..ed678529ebb 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -38,7 +38,7 @@
 
 struct vfsmount * configfs_mount = NULL;
 struct super_block * configfs_sb = NULL;
-kmem_cache_t *configfs_dir_cachep;
+struct kmem_cache *configfs_dir_cachep;
 static int configfs_mnt_count = 0;
 
 static struct super_operations configfs_ops = {
diff --git a/fs/dcache.c b/fs/dcache.c
index fd4a428998e..a7c67cee5d8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+static struct kmem_cache *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -2072,10 +2072,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep __read_mostly;
+struct kmem_cache *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep __read_mostly;
+struct kmem_cache *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0c4b0674854..21af1629f9b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -37,7 +37,7 @@ struct dcookie_struct {
 
 static LIST_HEAD(dcookie_users);
 static DEFINE_MUTEX(dcookie_mutex);
-static kmem_cache_t *dcookie_cache __read_mostly;
+static struct kmem_cache *dcookie_cache __read_mostly;
 static struct list_head *dcookie_hashtable __read_mostly;
 static size_t hash_size __read_mostly;
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 989b608fd83..5352b03ff5a 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -15,7 +15,7 @@
 #include "config.h"
 #include "memory.h"
 
-static kmem_cache_t *lkb_cache;
+static struct kmem_cache *lkb_cache;
 
 
 int dlm_memory_init(void)
diff --git a/fs/dnotify.c b/fs/dnotify.c
index e778b1737b7..1f26a2b9eee 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -23,7 +23,7 @@
 
 int dir_notify_enable __read_mostly = 1;
 
-static kmem_cache_t *dn_cache __read_mostly;
+static struct kmem_cache *dn_cache __read_mostly;
 
 static void redo_inode_mask(struct inode *inode)
 {
diff --git a/fs/dquot.c b/fs/dquot.c
index c6ae6c0e0cf..f9cd5e23ebd 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -131,7 +131,7 @@ static struct quota_format_type *quota_formats;	/* List of registered formats */
 static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
 
 /* SLAB cache for dquot structures */
-static kmem_cache_t *dquot_cachep;
+static struct kmem_cache *dquot_cachep;
 
 int register_quota_format(struct quota_format_type *fmt)
 {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a2c6ccbce30..306f8fbd1a0 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -546,7 +546,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
 }
 
 static struct ecryptfs_cache_info {
-	kmem_cache_t **cache;
+	struct kmem_cache **cache;
 	const char *name;
 	size_t size;
 	void (*ctor)(void*, struct kmem_cache *, unsigned long);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 69b15a996cf..dfebf21289f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -52,7 +52,7 @@ static struct pt_types sgi_pt_types[] = {
 };
 
 
-static kmem_cache_t * efs_inode_cachep;
+static struct kmem_cache * efs_inode_cachep;
 
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
@@ -68,7 +68,7 @@ static void efs_destroy_inode(struct inode *inode)
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f5c88435c6b..88a6f8d0b88 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -283,10 +283,10 @@ static struct mutex epmutex;
 static struct poll_safewake psw;
 
 /* Slab cache used to allocate "struct epitem" */
-static kmem_cache_t *epi_cache __read_mostly;
+static struct kmem_cache *epi_cache __read_mostly;
 
 /* Slab cache used to allocate "struct eppoll_entry" */
-static kmem_cache_t *pwq_cache __read_mostly;
+static struct kmem_cache *pwq_cache __read_mostly;
 
 /* Virtual fs used to allocate inodes for eventpoll files */
 static struct vfsmount *eventpoll_mnt __read_mostly;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85c237e7385..3aafb1dd917 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -135,7 +135,7 @@ static void ext2_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t * ext2_inode_cachep;
+static struct kmem_cache * ext2_inode_cachep;
 
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
@@ -156,7 +156,7 @@ static void ext2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0cf633f0cfa..9856565d4dd 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -436,7 +436,7 @@ static void ext3_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext3_inode_cachep;
+static struct kmem_cache *ext3_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -462,7 +462,7 @@ static void ext3_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c730cbc8403..f2e8c4aa0fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -486,7 +486,7 @@ static void ext4_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext4_inode_cachep;
+static struct kmem_cache *ext4_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -513,7 +513,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 8c272278455..05c2941c74f 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -34,9 +34,9 @@ static inline int fat_max_cache(struct inode *inode)
 	return FAT_MAX_CACHE;
 }
 
-static kmem_cache_t *fat_cache_cachep;
+static struct kmem_cache *fat_cache_cachep;
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct fat_cache *cache = (struct fat_cache *)foo;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b58fd0c9f3c..a9e4688582a 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -477,7 +477,7 @@ static void fat_put_super(struct super_block *sb)
 	kfree(sbi);
 }
 
-static kmem_cache_t *fat_inode_cachep;
+static struct kmem_cache *fat_inode_cachep;
 
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
@@ -493,7 +493,7 @@ static void fat_destroy_inode(struct inode *inode)
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c03dc9cb21c..4740d35e52c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -553,7 +553,7 @@ int send_sigurg(struct fown_struct *fown)
 }
 
 static DEFINE_RWLOCK(fasync_lock);
-static kmem_cache_t *fasync_cache __read_mostly;
+static struct kmem_cache *fasync_cache __read_mostly;
 
 /*
  * fasync_helper() is used by some character device drivers (mainly mice)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index d2dd0d70007..0b7ae897cb7 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -46,7 +46,7 @@ extern const struct address_space_operations vxfs_immed_aops;
 
 extern struct inode_operations vxfs_immed_symlink_iops;
 
-kmem_cache_t		*vxfs_inode_cachep;
+struct kmem_cache		*vxfs_inode_cachep;
 
 
 #ifdef DIAGNOSTIC
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c15139f275..357764d85ff 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,7 @@
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
-static kmem_cache_t *fuse_req_cachep;
+static struct kmem_cache *fuse_req_cachep;
 
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e039e2047cc..2bdc652b8b4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -22,7 +22,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *fuse_inode_cachep;
+static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
@@ -601,7 +601,7 @@ static struct file_system_type fuse_fs_type = {
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
-static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep,
 				 unsigned long flags)
 {
 	struct inode * inode = foo;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 9889c1eacec..7c1a9e22a52 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -25,7 +25,7 @@
 #include "util.h"
 #include "glock.h"
 
-static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_inode *ip = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
@@ -37,7 +37,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long
 	}
 }
 
-static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_glock *gl = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 196c604faad..e5707a9f78c 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -23,9 +23,9 @@
 #include "lm.h"
 #include "util.h"
 
-kmem_cache_t *gfs2_glock_cachep __read_mostly;
-kmem_cache_t *gfs2_inode_cachep __read_mostly;
-kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
+struct kmem_cache *gfs2_glock_cachep __read_mostly;
+struct kmem_cache *gfs2_inode_cachep __read_mostly;
+struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 76a50899fe9..7984dcf89ad 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -146,9 +146,9 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
 
 
-extern kmem_cache_t *gfs2_glock_cachep;
-extern kmem_cache_t *gfs2_inode_cachep;
-extern kmem_cache_t *gfs2_bufdata_cachep;
+extern struct kmem_cache *gfs2_glock_cachep;
+extern struct kmem_cache *gfs2_inode_cachep;
+extern struct kmem_cache *gfs2_bufdata_cachep;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 					   unsigned int *p)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ffc6409132c..a3698796600 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -24,7 +24,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static kmem_cache_t *hfs_inode_cachep;
+static struct kmem_cache *hfs_inode_cachep;
 
 MODULE_LICENSE("GPL");
 
@@ -430,7 +430,7 @@ static struct file_system_type hfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfs_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfs_inode_info *i = p;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4a0c70c76c8..0f513c6bf84 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -434,7 +434,7 @@ MODULE_AUTHOR("Brad Boyer");
 MODULE_DESCRIPTION("Extended Macintosh Filesystem");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *hfsplus_inode_cachep;
+static struct kmem_cache *hfsplus_inode_cachep;
 
 static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
@@ -467,7 +467,7 @@ static struct file_system_type hfsplus_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfsplus_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfsplus_inode_info *i = p;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 46ceadd6f16..34d68e21171 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -160,7 +160,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * hpfs_inode_cachep;
+static struct kmem_cache * hpfs_inode_cachep;
 
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
@@ -177,7 +177,7 @@ static void hpfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 36e52173a54..0706f5aac6a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -513,7 +513,7 @@ static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
 }
 
 
-static kmem_cache_t *hugetlbfs_inode_cachep;
+static struct kmem_cache *hugetlbfs_inode_cachep;
 
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 {
@@ -545,7 +545,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
 
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
diff --git a/fs/inode.c b/fs/inode.c
index dd15984d51a..699aa4f7aa7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -97,7 +97,7 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep __read_mostly;
+static struct kmem_cache * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
 
 EXPORT_SYMBOL(inode_init_once);
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct inode * inode = (struct inode *) foo;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 017cb0f134d..e1956e6f116 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -34,8 +34,8 @@
 
 #include <asm/ioctls.h>
 
-static kmem_cache_t *watch_cachep __read_mostly;
-static kmem_cache_t *event_cachep __read_mostly;
+static struct kmem_cache *watch_cachep __read_mostly;
+static struct kmem_cache *event_cachep __read_mostly;
 
 static struct vfsmount *inotify_mnt __read_mostly;
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4b6381cd2cf..ea55b6c469e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -57,7 +57,7 @@ static void isofs_put_super(struct super_block *sb)
 static void isofs_read_inode(struct inode *);
 static int isofs_statfs (struct dentry *, struct kstatfs *);
 
-static kmem_cache_t *isofs_inode_cachep;
+static struct kmem_cache *isofs_inode_cachep;
 
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct iso_inode_info *ei = foo;
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b85c686b60d..a8774bed20b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
 };
@@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *journal_head_cache;
+static struct kmem_cache *journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd_handle_cache;
+struct kmem_cache *jbd_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c532429d8d9..d204ab394f3 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *revoke_record_cache;
-static kmem_cache_t *revoke_table_cache;
+static struct kmem_cache *revoke_record_cache;
+static struct kmem_cache *revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c60f378b0f7..50356019ae3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1641,7 +1641,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
 };
@@ -1704,7 +1704,7 @@ void jbd2_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *jbd2_journal_head_cache;
+static struct kmem_cache *jbd2_journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -2007,7 +2007,7 @@ static void __exit jbd2_remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 380d19917f3..f506646ad0f 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *jbd2_revoke_record_cache;
-static kmem_cache_t *jbd2_revoke_table_cache;
+static struct kmem_cache *jbd2_revoke_record_cache;
+static struct kmem_cache *jbd2_revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3f7899ea4cb..9f15bce9202 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -61,8 +61,8 @@ static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static const struct address_space_operations jffs_address_operations;
 
-kmem_cache_t     *node_cache = NULL;
-kmem_cache_t     *fm_cache = NULL;
+struct kmem_cache     *node_cache = NULL;
+struct kmem_cache     *fm_cache = NULL;
 
 /* Called by the VFS at mount time to initialize the whole file system.  */
 static int jffs_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 29b68d939bd..077258b2103 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -29,8 +29,8 @@ static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset);
 static struct jffs_fm *jffs_alloc_fm(void);
 static void jffs_free_fm(struct jffs_fm *n);
 
-extern kmem_cache_t     *fm_cache;
-extern kmem_cache_t     *node_cache;
+extern struct kmem_cache     *fm_cache;
+extern struct kmem_cache     *node_cache;
 
 #if CONFIG_JFFS_FS_VERBOSE > 0
 void
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 33f29100501..83f9881ec4c 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -19,16 +19,16 @@
 
 /* These are initialised to NULL in the kernel startup code.
    If you're porting to other operating systems, beware */
-static kmem_cache_t *full_dnode_slab;
-static kmem_cache_t *raw_dirent_slab;
-static kmem_cache_t *raw_inode_slab;
-static kmem_cache_t *tmp_dnode_info_slab;
-static kmem_cache_t *raw_node_ref_slab;
-static kmem_cache_t *node_frag_slab;
-static kmem_cache_t *inode_cache_slab;
+static struct kmem_cache *full_dnode_slab;
+static struct kmem_cache *raw_dirent_slab;
+static struct kmem_cache *raw_inode_slab;
+static struct kmem_cache *tmp_dnode_info_slab;
+static struct kmem_cache *raw_node_ref_slab;
+static struct kmem_cache *node_frag_slab;
+static struct kmem_cache *inode_cache_slab;
 #ifdef CONFIG_JFFS2_FS_XATTR
-static kmem_cache_t *xattr_datum_cache;
-static kmem_cache_t *xattr_ref_cache;
+static struct kmem_cache *xattr_datum_cache;
+static struct kmem_cache *xattr_ref_cache;
 #endif
 
 int __init jffs2_create_slab_caches(void)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 77be534ce42..7deb7825402 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -28,7 +28,7 @@
 
 static void jffs2_put_super(struct super_block *);
 
-static kmem_cache_t *jffs2_inode_cachep;
+static struct kmem_cache *jffs2_inode_cachep;
 
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
-static void jffs2_i_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 0cccd1c39d7..b1a1c729601 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -74,7 +74,7 @@ static inline void lock_metapage(struct metapage *mp)
 }
 
 #define METAPOOL_MIN_PAGES 32
-static kmem_cache_t *metapage_cache;
+static struct kmem_cache *metapage_cache;
 static mempool_t *metapage_mempool;
 
 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
@@ -180,7 +180,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 9c1c6e0e633..ca3e1912853 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t * jfs_inode_cachep;
+static struct kmem_cache * jfs_inode_cachep;
 
 static struct super_operations jfs_super_operations;
 static struct export_operations jfs_export_operations;
@@ -748,7 +748,7 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
 
diff --git a/fs/locks.c b/fs/locks.c
index a7b97d50c1e..1cb0c57fedb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,7 +142,7 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
-static kmem_cache_t *filelock_cache __read_mostly;
+static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(locks_init_lock);
  * Initialises the fields of the file lock which are invariant for
  * free file_locks.
  */
-static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags)
 {
 	struct file_lock *lock = (struct file_lock *) foo;
 
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 0ff71256e65..deeb9dc062d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -85,7 +85,7 @@ struct mb_cache {
 #ifndef MB_CACHE_INDEXES_COUNT
 	int				c_indexes_count;
 #endif
-	kmem_cache_t			*c_entry_cache;
+	struct kmem_cache			*c_entry_cache;
 	struct list_head		*c_block_hash;
 	struct list_head		*c_indexes_hash[0];
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ce532c2deda..629e09b38c5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -51,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
 	return;
 }
 
-static kmem_cache_t * minix_inode_cachep;
+static struct kmem_cache * minix_inode_cachep;
 
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
@@ -67,7 +67,7 @@ static void minix_destroy_inode(struct inode *inode)
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 55442a6cf22..b00ac84ebbd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -36,7 +36,7 @@ static int event;
 
 static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
-static kmem_cache_t *mnt_cache __read_mostly;
+static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
 /* /sys/fs */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index ed84d899220..fae53243bb9 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -40,7 +40,7 @@ static void ncp_delete_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
 
-static kmem_cache_t * ncp_inode_cachep;
+static struct kmem_cache * ncp_inode_cachep;
 
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
@@ -56,7 +56,7 @@ static void ncp_destroy_inode(struct inode *inode)
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 769fd0a0c77..2f488e1d9b6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -58,7 +58,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static kmem_cache_t *nfs_direct_cachep;
+static struct kmem_cache *nfs_direct_cachep;
 
 /*
  * This represents a set of asynchronous requests that we're waiting on
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6b53aae4ed2..15afa460e62 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *);
 
 static void nfs_zap_acl_cache(struct inode *);
 
-static kmem_cache_t * nfs_inode_cachep;
+static struct kmem_cache * nfs_inode_cachep;
 
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
@@ -1111,7 +1111,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a1561a820ab..3fbfc2f0330 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,7 +20,7 @@
 
 #define NFS_PARANOIA 1
 
-static kmem_cache_t *nfs_page_cachep;
+static struct kmem_cache *nfs_page_cachep;
 
 static inline struct nfs_page *
 nfs_page_alloc(void)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 56f66f0ccb6..244a8c45b68 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -38,7 +38,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *);
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
-static kmem_cache_t *nfs_rdata_cachep;
+static struct kmem_cache *nfs_rdata_cachep;
 static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f7dd0d00595..41b07288f99 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
-static kmem_cache_t *nfs_wdata_cachep;
+static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e431e93ab50..640c92b2a9f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -84,10 +84,10 @@ static void nfs4_set_recdir(char *recdir);
  */
 static DEFINE_MUTEX(client_mutex);
 
-static kmem_cache_t *stateowner_slab = NULL;
-static kmem_cache_t *file_slab = NULL;
-static kmem_cache_t *stateid_slab = NULL;
-static kmem_cache_t *deleg_slab = NULL;
+static struct kmem_cache *stateowner_slab = NULL;
+static struct kmem_cache *file_slab = NULL;
+static struct kmem_cache *stateid_slab = NULL;
+static struct kmem_cache *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -1003,7 +1003,7 @@ alloc_init_file(struct inode *ino)
 }
 
 static void
-nfsd4_free_slab(kmem_cache_t **slab)
+nfsd4_free_slab(struct kmem_cache **slab)
 {
 	if (*slab == NULL)
 		return;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 01f91501f70..941acf14e61 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -66,7 +66,7 @@ static struct file_operations dlmfs_file_operations;
 static struct inode_operations dlmfs_dir_inode_operations;
 static struct inode_operations dlmfs_root_inode_operations;
 static struct inode_operations dlmfs_file_inode_operations;
-static kmem_cache_t *dlmfs_inode_cache;
+static struct kmem_cache *dlmfs_inode_cache;
 
 struct workqueue_struct *user_dlm_worker;
 
@@ -257,7 +257,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 }
 
 static void dlmfs_init_once(void *foo,
-			    kmem_cache_t *cachep,
+			    struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct dlmfs_inode_private *ip =
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f784177b624..856012b4fa4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dlm_dump_all_mles);
 #endif  /*  0  */
 
 
-static kmem_cache_t *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_mle_cache = NULL;
 
 
 static void dlm_mle_release(struct kref *kref);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index fcd4475d1f8..80ac69f11d9 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -61,7 +61,7 @@ struct ocfs2_em_insert_context {
 	struct ocfs2_extent_map_entry *right_ent;
 };
 
-static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
+static struct kmem_cache *ocfs2_em_ent_cachep = NULL;
 
 
 static struct ocfs2_extent_map_entry *
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 46a378fbc40..1a7dd2945b3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -106,7 +106,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 #define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
 #define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
 
-extern kmem_cache_t *ocfs2_inode_cache;
+extern struct kmem_cache *ocfs2_inode_cache;
 
 extern const struct address_space_operations ocfs2_aops;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7574d26ee0f..0524f8a04ad 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,7 @@
 
 #include "buffer_head_io.h"
 
-static kmem_cache_t *ocfs2_inode_cachep = NULL;
+static struct kmem_cache *ocfs2_inode_cachep = NULL;
 
 /* OCFS2 needs to schedule several differnt types of work which
  * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -914,7 +914,7 @@ bail:
 }
 
 static void ocfs2_inode_init_once(void *data,
-				  kmem_cache_t *cachep,
+				  struct kmem_cache *cachep,
 				  unsigned long flags)
 {
 	struct ocfs2_inode_info *oi = data;
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 9707ed7a320..39814b900fc 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -69,7 +69,7 @@ struct ocfs2_meta_cache_item {
 	sector_t	c_block;
 };
 
-static kmem_cache_t *ocfs2_uptodate_cachep = NULL;
+static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
 void ocfs2_metadata_cache_init(struct inode *inode)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911d1bcfc56..26f44e0074e 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -330,7 +330,7 @@ out:
 	return 0;
 }
 
-static kmem_cache_t *op_inode_cachep;
+static struct kmem_cache *op_inode_cachep;
 
 static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
@@ -415,7 +415,7 @@ static struct file_system_type openprom_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
+static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct op_inode_info *oi = (struct op_inode_info *) data;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b24cdb2f17c..e26945ba685 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -81,7 +81,7 @@ static void proc_read_inode(struct inode * inode)
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 }
 
-static kmem_cache_t * proc_inode_cachep;
+static struct kmem_cache * proc_inode_cachep;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -105,7 +105,7 @@ static void proc_destroy_inode(struct inode *inode)
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5b943eb11d7..c047dc654d5 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -515,7 +515,7 @@ static void qnx4_read_inode(struct inode *inode)
 	brelse(bh);
 }
 
-static kmem_cache_t *qnx4_inode_cachep;
+static struct kmem_cache *qnx4_inode_cachep;
 
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
@@ -531,7 +531,7 @@ static void qnx4_destroy_inode(struct inode *inode)
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep,
+static void init_once(void *foo, struct kmem_cache * cachep,
 		      unsigned long flags)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 32332516d65..745bc714ba9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -490,7 +490,7 @@ static void reiserfs_put_super(struct super_block *s)
 	return;
 }
 
-static kmem_cache_t *reiserfs_inode_cachep;
+static struct kmem_cache *reiserfs_inode_cachep;
 
 static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
@@ -507,7 +507,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
 
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index d1b455f9b66..c5af088d4a4 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -550,7 +550,7 @@ romfs_read_inode(struct inode *i)
 	}
 }
 
-static kmem_cache_t * romfs_inode_cachep;
+static struct kmem_cache * romfs_inode_cachep;
 
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
@@ -566,7 +566,7 @@ static void romfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct romfs_inode_info *ei = (struct romfs_inode_info *) foo;
 
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 22161710348..4af4cd729a5 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -50,7 +50,7 @@ static void smb_put_super(struct super_block *);
 static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
 
-static kmem_cache_t *smb_inode_cachep;
+static struct kmem_cache *smb_inode_cachep;
 
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
@@ -66,7 +66,7 @@ static void smb_destroy_inode(struct inode *inode)
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
 	unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 3eb1402191b..a4bcae8a9af 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -25,7 +25,7 @@
 #define ROUND_UP(x) (((x)+3) & ~3)
 
 /* cache for request structures */
-static kmem_cache_t *req_cachep;
+static struct kmem_cache *req_cachep;
 
 static int smb_request_send_req(struct smb_request *req);
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 20551a1b8a5..e503f858fba 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,7 +16,7 @@
 
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
-kmem_cache_t *sysfs_dir_cachep;
+struct kmem_cache *sysfs_dir_cachep;
 
 static struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6f3d6bd5288..bd7cec295da 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,6 +1,6 @@
 
 extern struct vfsmount * sysfs_mount;
-extern kmem_cache_t *sysfs_dir_cachep;
+extern struct kmem_cache *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index a6ca12b747c..ead9864567e 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -301,7 +301,7 @@ static void sysv_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static kmem_cache_t *sysv_inode_cachep;
+static struct kmem_cache *sysv_inode_cachep;
 
 static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
@@ -318,7 +318,7 @@ static void sysv_destroy_inode(struct inode *inode)
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e50f24221de..397f54aaf66 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,7 +107,7 @@ static struct file_system_type udf_fstype = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static kmem_cache_t * udf_inode_cachep;
+static struct kmem_cache * udf_inode_cachep;
 
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
@@ -130,7 +130,7 @@ static void udf_destroy_inode(struct inode *inode)
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *) foo;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85a88c0c5e6..0b18d2cc2ef 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1204,7 +1204,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * ufs_inode_cachep;
+static struct kmem_cache * ufs_inode_cachep;
 
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
@@ -1221,7 +1221,7 @@ static void ufs_destroy_inode(struct inode *inode)
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
 
-- 
cgit v1.2.3


From 7dfb71030f7636a0d65200158113c37764552f93 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:23 -0800
Subject: [PATCH] Add include/linux/freezer.h and move definitions from sched.h

Move process freezing functions from include/linux/sched.h to freezer.h, so
that modifications to the freezer or the kernel configuration don't require
recompiling just about everything.

[akpm@osdl.org: fix ueagle driver]
Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/kafsasyncd.c          | 1 +
 fs/afs/kafstimod.c           | 1 +
 fs/cifs/cifsfs.c             | 1 +
 fs/cifs/connect.c            | 1 +
 fs/jbd/journal.c             | 2 +-
 fs/jbd2/journal.c            | 2 +-
 fs/jffs/intrep.c             | 1 +
 fs/jffs2/background.c        | 1 +
 fs/jfs/jfs_logmgr.c          | 2 +-
 fs/jfs/jfs_txnmgr.c          | 2 +-
 fs/lockd/clntproc.c          | 1 +
 fs/xfs/linux-2.6/xfs_buf.c   | 1 +
 fs/xfs/linux-2.6/xfs_super.c | 1 +
 13 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index f09a794f248..615df2407cb 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "server.h"
 #include "volume.h"
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 65bc05ab818..694344e4d3c 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "volume.h"
 #include "kafstimod.h"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e6b5866e500..71bc87a37fc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #define DECLARE_GLOBALS_HERE
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71f77914ce9..2caca06b4ba 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/completion.h>
 #include <linux/pagevec.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include "cifspdu.h"
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a8774bed20b..10fff944393 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 50356019ae3..44fc32bfd7f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 4a543e11497..478a74e2e9d 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -66,6 +66,7 @@
 #include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/ctype.h>
+#include <linux/freezer.h>
 
 #include "intrep.h"
 #include "jffs_fm.h"
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index ff2a872e80e..6eb3daebd56 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,6 +16,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/freezer.h>
 #include "nodelist.h"
 
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b89c9aba046..5065baa530b 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -67,7 +67,7 @@
 #include <linux/kthread.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include "jfs_incore.h"
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 81f6f04af19..d558e51b0df 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3d84f600b63..50643b6a555 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -13,6 +13,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/utsname.h>
 #include <linux/smp_lock.h>
+#include <linux/freezer.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index eef4a0ba11e..b971237c5a9 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
+#include <linux/freezer.h>
 
 STATIC kmem_zone_t *xfs_buf_zone;
 STATIC kmem_shaker_t xfs_buf_shake;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index de05abbbe7f..b93265b7c79 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -56,6 +56,7 @@
 #include <linux/mempool.h>
 #include <linux/writeback.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 STATIC struct quotactl_ops xfs_quotactl_operations;
 STATIC struct super_operations xfs_super_operations;
-- 
cgit v1.2.3


From 58e14b148ddb56f0bf999965d6279932ed4a00bc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:50 -0800
Subject: [PATCH] Use freezeable workqueues in XFS

Make the workqueues used by XFS freezeable, so their worker threads don't
submit any I/O after the suspend image has been created.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_buf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b971237c5a9..4fb01ffdfd1 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1827,11 +1827,11 @@ xfs_buf_init(void)
 	if (!xfs_buf_zone)
 		goto out_free_trace_buf;
 
-	xfslogd_workqueue = create_workqueue("xfslogd");
+	xfslogd_workqueue = create_freezeable_workqueue("xfslogd");
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = create_workqueue("xfsdatad");
+	xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad");
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-- 
cgit v1.2.3


From 5127d002f9769ba6b1691de78dd3a5c14635e183 Mon Sep 17 00:00:00 2001
From: Suzuki Kp <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:35:14 -0800
Subject: [PATCH] fix rescan_partitions to return errors properly

The current rescan_partition implementation ignores the errors that comes from
the lower layer.  It reports success for unknown partitions as well as I/O
error cases while reading the partition information.

The unknown partition is not (and will not be) considered as an error in the
kernel, since there are legal users of it (e.g, members of a RAID5 MD Device
or a new disk which is not partitioned at all ).  Changing this behaviour
would scare the user about a serious problem with their disk and is not
recommended.  Thus for both "unknown partitions" to the Linux (eg., DEC
VMS,Novell Netware) and the legal users of NULL partition, would still be
reported as "SUCCESS".

The patch attached here, scares the user about something which he does need to
worry about.  i.e, returning -EIO on disk I/O errors while reading the
partition information.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: Erik Mouw <erik@harddisk-recovery.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6fb4b6150d7..0b6113ba3b7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -177,7 +177,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
 	else if (warn_no_part)
 		printk(" unable to read partition table\n");
 	kfree(state);
-	return NULL;
+	return ERR_PTR(res);
 }
 
 /*
@@ -494,6 +494,8 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		disk->fops->revalidate_disk(disk);
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
+	if (IS_ERR(state))	/* I/O error reading the partition table */
+		return PTR_ERR(state);
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
-- 
cgit v1.2.3


From 57881dd9df40b76dc7fc6a0d13fd75f337accb32 Mon Sep 17 00:00:00 2001
From: Suzuki K P <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:35:16 -0800
Subject: [PATCH] Fix check_partition routines

check_partition() stops its probe once it hits an I/O error from the
partition checkers.  This would prevent the actual partition checker
getting a chance to verify the partition.

So this patch lets check_partition() continue probing untill it hits a
success while recording the I/O error which might have been reported by the
checking routines.

Also, it does some cleanup of the partition methods for ibm, atari and
amiga to return -1 upon hitting an I/O error.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/amiga.c |  2 ++
 fs/partitions/atari.c |  2 +-
 fs/partitions/check.c | 15 +++++++++++++--
 fs/partitions/ibm.c   | 29 +++++++++++++++++------------
 4 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
index 3068528890a..9917a8c360f 100644
--- a/fs/partitions/amiga.c
+++ b/fs/partitions/amiga.c
@@ -43,6 +43,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (warn_no_part)
 				printk("Dev %s: unable to read RDB block %d\n",
 				       bdevname(bdev, b), blk);
+			res = -1;
 			goto rdb_done;
 		}
 		if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK))
@@ -79,6 +80,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (warn_no_part)
 				printk("Dev %s: unable to read partition block %d\n",
 				       bdevname(bdev, b), blk);
+			res = -1;
 			goto rdb_done;
 		}
 		pb  = (struct PartitionBlock *)data;
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
index 192a6adfdef..1f3572d5b75 100644
--- a/fs/partitions/atari.c
+++ b/fs/partitions/atari.c
@@ -88,7 +88,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (!xrs) {
 				printk (" block %ld read failed\n", partsect);
 				put_dev_sector(sect);
-				return 0;
+				return -1;
 			}
 
 			/* ++roman: sanity check: bit 0 of flg field must be set */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0b6113ba3b7..1901137f4ec 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -153,7 +153,7 @@ static struct parsed_partitions *
 check_partition(struct gendisk *hd, struct block_device *bdev)
 {
 	struct parsed_partitions *state;
-	int i, res;
+	int i, res, err;
 
 	state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
 	if (!state)
@@ -165,13 +165,24 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
 		sprintf(state->name, "p");
 
 	state->limit = hd->minors;
-	i = res = 0;
+	i = res = err = 0;
 	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
 		res = check_part[i++](state, bdev);
+		if (res < 0) {
+			/* We have hit an I/O error which we don't report now.
+		 	* But record it, and let the others do their job.
+		 	*/
+			err = res;
+			res = 0;
+		}
+
 	}
 	if (res > 0)
 		return state;
+	if (!err)
+	/* The partition is unrecognized. So report I/O errors if there were any */
+		res = err;
 	if (!res)
 		printk(" unknown partition table\n");
 	else if (warn_no_part)
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index d352a7381fe..9f7ad4244f6 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -43,7 +43,7 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
 int
 ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int blocksize, offset, size;
+	int blocksize, offset, size,res;
 	loff_t i_size;
 	dasd_information_t *info;
 	struct hd_geometry *geo;
@@ -56,15 +56,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	unsigned char *data;
 	Sector sect;
 
+	res = 0;
 	blocksize = bdev_hardsect_size(bdev);
 	if (blocksize <= 0)
-		return 0;
+		goto out_exit;
 	i_size = i_size_read(bdev->bd_inode);
 	if (i_size == 0)
-		return 0;
+		goto out_exit;
 
 	if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL)
-		goto out_noinfo;
+		goto out_exit;
 	if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
 		goto out_nogeo;
 	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
@@ -72,7 +73,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
-		goto out_noioctl;
+		goto out_freeall;
 
 	/*
 	 * Get volume label, extract name and type.
@@ -92,6 +93,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	EBCASC(type, 4);
 	EBCASC(name, 6);
 
+	res = 1;
+
 	/*
 	 * Three different types: CMS1, VOL1 and LNX1/unlabeled
 	 */
@@ -156,6 +159,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 			counter++;
 			blk++;
 		}
+		if (!data)
+		/* Are we not supposed to report this ? */
+			goto out_readerr;
 	} else {
 		/*
 		 * Old style LNX1 or unlabeled disk
@@ -171,18 +177,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	}
 
 	printk("\n");
-	kfree(label);
-	kfree(geo);
-	kfree(info);
-	return 1;
+	goto out_freeall;
+
 
 out_readerr:
-out_noioctl:
+	res = -1;
+out_freeall:
 	kfree(label);
 out_nolab:
 	kfree(geo);
 out_nogeo:
 	kfree(info);
-out_noinfo:
-	return 0;
+out_exit:
+	return res;
 }
-- 
cgit v1.2.3


From 317a40ac2237732aba531eee2c7b5e39dd40e959 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@aknet.ru>
Date: Wed, 6 Dec 2006 20:35:25 -0800
Subject: [PATCH] honour MNT_NOEXEC for access()

Make access(X_OK) take the "noexec" mount option into account.

Signed-off-by: Stas Sergeev <stsp@aknet.ru>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 28d49b301d5..61f99c1967d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -249,9 +249,11 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/*
 	 * MAY_EXEC on regular files requires special handling: We override
-	 * filesystem execute permissions if the mode bits aren't set.
+	 * filesystem execute permissions if the mode bits aren't set or
+	 * the fs is mounted with the "noexec" flag.
 	 */
-	if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+	if ((mask & MAY_EXEC) && S_ISREG(mode) && (!(mode & S_IXUGO) ||
+			(nd && nd->mnt && (nd->mnt->mnt_flags & MNT_NOEXEC))))
 		return -EACCES;
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
-- 
cgit v1.2.3


From e4fca01ea2b41c41a82f4ca3537f6ebc237adde5 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:27 -0800
Subject: [PATCH] ext2: fsid for statvfs

Update ext2_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3aafb1dd917..255cef5f742 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1090,8 +1090,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct ext2_super_block *es = sbi->s_es;
 	unsigned long overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -1104,7 +1106,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * All of the blocks before first_data_block are
 		 * overhead
 		 */
-		overhead = le32_to_cpu(sbi->s_es->s_first_data_block);
+		overhead = le32_to_cpu(es->s_first_data_block);
 
 		/*
 		 * Add the overhead attributed to the superblock and
@@ -1125,14 +1127,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 	buf->f_type = EXT2_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
 	buf->f_bfree = ext2_count_free_blocks(sb);
-	buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
-	if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
+	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
+	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
-	buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
-	buf->f_ffree = ext2_count_free_inodes (sb);
+	buf->f_files = le32_to_cpu(es->s_inodes_count);
+	buf->f_ffree = ext2_count_free_inodes(sb);
 	buf->f_namelen = EXT2_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 50ee0a32b192902e32a2b596df7ec3496c4bf485 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:28 -0800
Subject: [PATCH] ext3: fsid for statvfs

Update ext3_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9856565d4dd..8ab19813335 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2387,6 +2387,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct ext3_super_block *es = sbi->s_es;
 	ext3_fsblk_t overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -2433,6 +2434,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT3_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 960cc398a7a2acfe455b2ec33c64dc6018c83aab Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:29 -0800
Subject: [PATCH] ext4: fsid for statvfs

Update ext4_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f2e8c4aa0fd..2ede7e2c701 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2460,6 +2460,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct ext4_super_block *es = sbi->s_es;
 	ext4_fsblk_t overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -2506,6 +2507,10 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT4_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.3


From bdcf25080438ba71bb24b885e7c102de72c25c9d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:41 -0800
Subject: [PATCH] fuse: minor cleanup in fuse_dentry_revalidate

Remove unneeded code from fuse_dentry_revalidate().  This made some sense
while the validity time could wrap around, but now it's a very obvious no-op.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c71a6c092ad..677f3ed7f98 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -141,9 +141,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct fuse_req *forget_req;
 		struct dentry *parent;
 
-		/* Doesn't hurt to "reset" the validity timeout */
-		fuse_invalidate_entry_cache(entry);
-
 		/* For negative dentries, always do a fresh lookup */
 		if (!inode)
 			return 0;
-- 
cgit v1.2.3


From d6392f873f1d09974d5c92c52715fa422ad7c625 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:44 -0800
Subject: [PATCH] fuse: add support for block device based filesystems

I never intended this, but people started using fuse to implement block device
based "real" filesystems (ntfs-3g, zfs).

The following four patches add better support for these kinds of filesystems.
Unlike "normal" fuse filesystems, using this feature should require superuser
privileges (enforced by the fusermount utility).

Thanks to Szabolcs Szakacsits for the input and testing.

This patch adds a 'fuseblk' filesystem type, which is only different from the
'fuse' filesystem type in how the 'dev_name' mount argument is interpreted.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2bdc652b8b4..38cf97d6469 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -591,6 +591,14 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static int fuse_get_sb_blk(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *raw_data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
+			   mnt);
+}
+
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
@@ -598,6 +606,14 @@ static struct file_system_type fuse_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
+static struct file_system_type fuseblk_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuseblk",
+	.get_sb		= fuse_get_sb_blk,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
@@ -617,24 +633,34 @@ static int __init fuse_fs_init(void)
 
 	err = register_filesystem(&fuse_fs_type);
 	if (err)
-		printk("fuse: failed to register filesystem\n");
-	else {
-		fuse_inode_cachep = kmem_cache_create("fuse_inode",
-						      sizeof(struct fuse_inode),
-						      0, SLAB_HWCACHE_ALIGN,
-						      fuse_inode_init_once, NULL);
-		if (!fuse_inode_cachep) {
-			unregister_filesystem(&fuse_fs_type);
-			err = -ENOMEM;
-		}
-	}
+		goto out;
+
+	err = register_filesystem(&fuseblk_fs_type);
+	if (err)
+		goto out_unreg;
+
+	fuse_inode_cachep = kmem_cache_create("fuse_inode",
+					      sizeof(struct fuse_inode),
+					      0, SLAB_HWCACHE_ALIGN,
+					      fuse_inode_init_once, NULL);
+	err = -ENOMEM;
+	if (!fuse_inode_cachep)
+		goto out_unreg2;
+
+	return 0;
 
+ out_unreg2:
+	unregister_filesystem(&fuseblk_fs_type);
+ out_unreg:
+	unregister_filesystem(&fuse_fs_type);
+ out:
 	return err;
 }
 
 static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
+	unregister_filesystem(&fuseblk_fs_type);
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
-- 
cgit v1.2.3


From d809161402e9f99aefe8848c4e701597ac367269 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:48 -0800
Subject: [PATCH] fuse: add blksize option

Add 'blksize' option for block device based filesystems.  During
initialization this is used to set the block size on the device and the super
block.  The default block size is 512bytes.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 38cf97d6469..1baaaeb2e85 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -39,6 +39,7 @@ struct fuse_mount_data {
 	unsigned group_id_present : 1;
 	unsigned flags;
 	unsigned max_read;
+	unsigned blksize;
 };
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -274,6 +275,7 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_MAX_READ,
+	OPT_BLKSIZE,
 	OPT_ERR
 };
 
@@ -285,14 +287,16 @@ static match_table_t tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_MAX_READ,			"max_read=%u"},
+	{OPT_BLKSIZE,			"blksize=%u"},
 	{OPT_ERR,			NULL}
 };
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 {
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
 	d->max_read = ~0;
+	d->blksize = 512;
 
 	while ((p = strsep(&opt, ",")) != NULL) {
 		int token;
@@ -345,6 +349,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->max_read = value;
 			break;
 
+		case OPT_BLKSIZE:
+			if (!is_bdev || match_int(&args[0], &value))
+				return 0;
+			d->blksize = value;
+			break;
+
 		default:
 			return 0;
 		}
@@ -500,15 +510,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	struct dentry *root_dentry;
 	struct fuse_req *init_req;
 	int err;
+	int is_bdev = sb->s_bdev != NULL;
 
 	if (sb->s_flags & MS_MANDLOCK)
 		return -EINVAL;
 
-	if (!parse_fuse_opt((char *) data, &d))
+	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		return -EINVAL;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	if (is_bdev) {
+		if (!sb_set_blocksize(sb, d.blksize))
+			return -EINVAL;
+	} else {
+		sb->s_blocksize = PAGE_CACHE_SIZE;
+		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-- 
cgit v1.2.3


From b2d2272fae1e1df26ec8f93a6d5baea891dcce37 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:51 -0800
Subject: [PATCH] fuse: add bmap support

Add support for the BMAP operation for block device based filesystems.  This
is needed to support swap-files and lilo.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    |  2 ++
 fs/fuse/file.c   | 37 +++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  3 +++
 3 files changed, 42 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 677f3ed7f98..1cabdb229ad 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1024,6 +1024,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	if (attr->ia_valid & ATTR_SIZE) {
 		unsigned long limit;
 		is_truncate = 1;
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
 		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
 		if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
 			send_sig(SIGXFSZ, current, 0);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 763a50daf1c..128f79c4080 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -754,6 +754,42 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
 	return err;
 }
 
+static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_bmap_in inarg;
+	struct fuse_bmap_out outarg;
+	int err;
+
+	if (!inode->i_sb->s_bdev || fc->no_bmap)
+		return 0;
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return 0;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.block = block;
+	inarg.blocksize = inode->i_sb->s_blocksize;
+	req->in.h.opcode = FUSE_BMAP;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS)
+		fc->no_bmap = 1;
+
+	return err ? 0 : outarg.block;
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -787,6 +823,7 @@ static const struct address_space_operations fuse_file_aops  = {
 	.commit_write	= fuse_commit_write,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= fuse_set_page_dirty,
+	.bmap		= fuse_bmap,
 };
 
 void fuse_init_file_inode(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 91edb8932d9..58d482d9f6b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -339,6 +339,9 @@ struct fuse_conn {
 	/** Is interrupt not implemented by fs? */
 	unsigned no_interrupt : 1;
 
+	/** Is bmap not implemented by fs? */
+	unsigned no_bmap : 1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
-- 
cgit v1.2.3


From 0ec7ca41f6f0f74a394a7d686bc0ee8afef84887 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:52 -0800
Subject: [PATCH] fuse: add DESTROY operation

Add a DESTROY operation for block device based filesystems.  With the help of
this operation, such a filesystem can flush dirty data to the device
synchronously before the umount returns.

This is needed in situations where the filesystem is assumed to be clean
immediately after unmount (e.g.  ejecting removable media).

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/fuse_i.h |  6 ++++++
 fs/fuse/inode.c  | 22 ++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 58d482d9f6b..b98b20de740 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -298,6 +298,9 @@ struct fuse_conn {
 	    reply, before any other request, and never cleared */
 	unsigned conn_error : 1;
 
+	/** Connection successful.  Only set in INIT */
+	unsigned conn_init : 1;
+
 	/** Do readpages asynchronously?  Only set in INIT */
 	unsigned async_read : 1;
 
@@ -368,6 +371,9 @@ struct fuse_conn {
 
 	/** Key for lock owner ID scrambling */
 	u32 scramble_key[4];
+
+	/** Reserved request for the DESTROY message */
+	struct fuse_req *destroy_req;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1baaaeb2e85..437d61c6526 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,10 +206,23 @@ static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags)
 		fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb));
 }
 
+static void fuse_send_destroy(struct fuse_conn *fc)
+{
+	struct fuse_req *req = fc->destroy_req;
+	if (req && fc->conn_init) {
+		fc->destroy_req = NULL;
+		req->in.h.opcode = FUSE_DESTROY;
+		req->force = 1;
+		request_send(fc, req);
+		fuse_put_request(fc, req);
+	}
+}
+
 static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
+	fuse_send_destroy(fc);
 	spin_lock(&fc->lock);
 	fc->connected = 0;
 	fc->blocked = 0;
@@ -410,6 +423,8 @@ static struct fuse_conn *new_conn(void)
 void fuse_conn_put(struct fuse_conn *fc)
 {
 	if (atomic_dec_and_test(&fc->count)) {
+		if (fc->destroy_req)
+			fuse_request_free(fc->destroy_req);
 		mutex_destroy(&fc->inst_mutex);
 		kfree(fc);
 	}
@@ -466,6 +481,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
 		fc->minor = arg->minor;
 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+		fc->conn_init = 1;
 	}
 	fuse_put_request(fc, req);
 	fc->blocked = 0;
@@ -563,6 +579,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!init_req)
 		goto err_put_root;
 
+	if (is_bdev) {
+		fc->destroy_req = fuse_request_alloc();
+		if (!fc->destroy_req)
+			goto err_put_root;
+	}
+
 	mutex_lock(&fuse_mutex);
 	err = -EINVAL;
 	if (file->private_data)
-- 
cgit v1.2.3


From 875d95ec9eb69ffb334116fb44d04d9a64dcbfbb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:54 -0800
Subject: [PATCH] fuse: fix compile without CONFIG_BLOCK

Randy Dunlap wote:
> Should FUSE depend on BLOCK?  Without that and with BLOCK=n, I get:
>
> inode.c:(.text+0x3acc5): undefined reference to `sb_set_blocksize'
> inode.c:(.text+0x3a393): undefined reference to `get_sb_bdev'
> fs/built-in.o:(.data+0xd718): undefined reference to `kill_block_super

Most fuse filesystems work fine without block device support, so I
think a better solution is to disable the 'fuseblk' filesystem type if
BLOCK=n.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 437d61c6526..12450d2b320 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -535,8 +535,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 		return -EINVAL;
 
 	if (is_bdev) {
+#ifdef CONFIG_BLOCK
 		if (!sb_set_blocksize(sb, d.blksize))
 			return -EINVAL;
+#endif
 	} else {
 		sb->s_blocksize = PAGE_CACHE_SIZE;
 		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -629,6 +631,14 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static struct file_system_type fuse_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuse",
+	.get_sb		= fuse_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+#ifdef CONFIG_BLOCK
 static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
 			   void *raw_data, struct vfsmount *mnt)
@@ -637,13 +647,6 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   mnt);
 }
 
-static struct file_system_type fuse_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "fuse",
-	.get_sb		= fuse_get_sb,
-	.kill_sb	= kill_anon_super,
-};
-
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
@@ -652,6 +655,26 @@ static struct file_system_type fuseblk_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
+static inline int register_fuseblk(void)
+{
+	return register_filesystem(&fuseblk_fs_type);
+}
+
+static inline void unregister_fuseblk(void)
+{
+	unregister_filesystem(&fuseblk_fs_type);
+}
+#else
+static inline int register_fuseblk(void)
+{
+	return 0;
+}
+
+static inline void unregister_fuseblk(void)
+{
+}
+#endif
+
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
@@ -673,7 +696,7 @@ static int __init fuse_fs_init(void)
 	if (err)
 		goto out;
 
-	err = register_filesystem(&fuseblk_fs_type);
+	err = register_fuseblk();
 	if (err)
 		goto out_unreg;
 
@@ -688,7 +711,7 @@ static int __init fuse_fs_init(void)
 	return 0;
 
  out_unreg2:
-	unregister_filesystem(&fuseblk_fs_type);
+	unregister_fuseblk();
  out_unreg:
 	unregister_filesystem(&fuse_fs_type);
  out:
@@ -698,7 +721,7 @@ static int __init fuse_fs_init(void)
 static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
-	unregister_filesystem(&fuseblk_fs_type);
+	unregister_fuseblk();
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
-- 
cgit v1.2.3


From 593be07ae8f6f4a1b1b98813fabb155328f8bc0c Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 6 Dec 2006 20:36:01 -0800
Subject: [PATCH] file: kill unnecessary timer in fdtable_defer

free_fdtable_rc() schedules timer to reschedule fddef->wq if
schedule_work() on it returns 0.  However, schedule_work() guarantees that
the target work is executed at least once after the scheduling regardless
of its return value.  0 return simply means that the work was already
pending and thus no further action was required.

Another problem is that it used contant '5' as @expires argument to
mod_timer().

Kill unnecessary fddef->timer.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 29 ++---------------------------
 1 file changed, 2 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 3787e82f54c..51aef675470 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
-	struct timer_list timer;
 	struct fdtable *next;
 };
 
@@ -75,22 +74,6 @@ static void __free_fdtable(struct fdtable *fdt)
 	kfree(fdt);
 }
 
-static void fdtable_timer(unsigned long data)
-{
-	struct fdtable_defer *fddef = (struct fdtable_defer *)data;
-
-	spin_lock(&fddef->lock);
-	/*
-	 * If someone already emptied the queue return.
-	 */
-	if (!fddef->next)
-		goto out;
-	if (!schedule_work(&fddef->wq))
-		mod_timer(&fddef->timer, 5);
-out:
-	spin_unlock(&fddef->lock);
-}
-
 static void free_fdtable_work(struct work_struct *work)
 {
 	struct fdtable_defer *f =
@@ -144,13 +127,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		spin_lock(&fddef->lock);
 		fdt->next = fddef->next;
 		fddef->next = fdt;
-		/*
-		 * vmallocs are handled from the workqueue context.
-		 * If the per-cpu workqueue is running, then we
-		 * defer work scheduling through a timer.
-		 */
-		if (!schedule_work(&fddef->wq))
-			mod_timer(&fddef->timer, 5);
+		/* vmallocs are handled from the workqueue context */
+		schedule_work(&fddef->wq);
 		spin_unlock(&fddef->lock);
 		put_cpu_var(fdtable_defer_list);
 	}
@@ -354,9 +332,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
 	INIT_WORK(&fddef->wq, free_fdtable_work);
-	init_timer(&fddef->timer);
-	fddef->timer.data = (unsigned long)fddef;
-	fddef->timer.function = fdtable_timer;
 	fddef->next = NULL;
 }
 
-- 
cgit v1.2.3


From 87b4126f10cce2d49687df227f6228fa5a9ac6c6 Mon Sep 17 00:00:00 2001
From: Suzuki K P <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:36:10 -0800
Subject: [PATCH] fix reiserfs bad path release panic

One of our test team hit a reiserfs_panic while running fsstress tests on
2.6.19-rc1.  The message looks like :

  REISERFS: panic(device Null superblock):
  reiserfs[5676]: assertion !(p->path_length != 1 ) failed at
  fs/reiserfs/stree.c:397:reiserfs_check_path: path not properly relsed.

The backtrace looked :

  kernel BUG in reiserfs_panic at fs/reiserfs/prints.c:361!
	.reiserfs_check_path+0x58/0x74
	.reiserfs_get_block+0x1444/0x1508
	.__block_prepare_write+0x1c8/0x558
	.block_prepare_write+0x34/0x64
	.reiserfs_prepare_write+0x118/0x1d0
	.generic_file_buffered_write+0x314/0x82c
	.__generic_file_aio_write_nolock+0x350/0x3e0
	.__generic_file_write_nolock+0x78/0xb0
	.generic_file_write+0x60/0xf0
	.reiserfs_file_write+0x198/0x2038
	.vfs_write+0xd0/0x1b4
	.sys_write+0x4c/0x8c
	syscall_exit+0x0/0x4

Upon debugging I found that the restart_transaction was not releasing
the path if the th->refcount was > 1.

/*static*/
int restart_transaction(struct reiserfs_transaction_handle *th,
                           			struct inode *inode, struct path *path)
{
	[...]

         /* we cannot restart while nested */
         if (th->t_refcount > 1) { <<- Path is not released in this case!
                 return 0;
         }

         pathrelse(path); <<- Path released here.
	[...]

This could happen in such a situation :

In reiserfs/inode.c: reiserfs_get_block() ::

      if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
          /* restart the transaction to give the journal a chance to free
           ** some blocks.  releases the path, so we have to go back to
           ** research if we succeed on the second try
           */
          SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;

        -->>  retval = restart_transaction(th, inode, &path); <<--

  We are supposed to release the path, no matter we succeed or fail. But
if the th->refcount is > 1, the path is still valid. And,

          if (retval)
                   goto failure;
          repeat =
              _allocate_block(th, block, inode,
                             &allocated_block_nr, NULL, create);

If the above allocate_block fails with NO_DISK_SPACE or QUOTA_EXCEEDED,
we would have path which is not released.

         if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
                   goto research;
         }
         if (repeat == QUOTA_EXCEEDED)
                   retval = -EDQUOT;
         else
                   retval = -ENOSPC;
         goto failure;
	[...]

       failure:
	[...]
         reiserfs_check_path(&path); << Panics here !

Attached here is a patch which could fix the issue.

fix reiserfs/inode.c : restart_transaction() to release the path in all
cases.

The restart_transaction() doesn't release the path when the the journal
handle has a refcount > 1.  This would trigger a reiserfs_panic() if we
encounter an -ENOSPC / -EDQUOT in reiserfs_get_block().

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: "Vladimir V. Saveliev" <vs@namesys.com>
Cc: <reiserfs-dev@namesys.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9c69bcacad2..a625688de45 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -216,11 +216,12 @@ static int file_capable(struct inode *inode, long block)
 	BUG_ON(!th->t_trans_id);
 	BUG_ON(!th->t_refcount);
 
+	pathrelse(path);
+
 	/* we cannot restart while nested */
 	if (th->t_refcount > 1) {
 		return 0;
 	}
-	pathrelse(path);
 	reiserfs_update_sd(th, inode);
 	err = journal_end(th, s, len);
 	if (!err) {
-- 
cgit v1.2.3


From ed2908f31398049c4371de9b100700e80704e95f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 6 Dec 2006 20:36:16 -0800
Subject: [PATCH] Remove superfluous lock_super() in extN xattr code

lock_super() is unnecessary for setting super-block feature flags.  Use the
provided *_SET_COMPAT_FEATURE() macros as well.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/xattr.c | 5 +----
 fs/ext3/xattr.c | 5 +----
 fs/ext4/xattr.c | 5 +----
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index af52a7f8b29..247efd0b51d 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -342,12 +342,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
-	EXT2_SB(sb)->s_es->s_feature_compat |=
-		cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
+	EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
 	sb->s_dirt = 1;
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
-	unlock_super(sb);
 }
 
 /*
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index f86f2482f01..99857a400f4 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -459,14 +459,11 @@ static void ext3_xattr_update_super_block(handle_t *handle,
 	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
 	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
-		EXT3_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
 		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	}
-	unlock_super(sb);
 }
 
 /*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 63233cd946a..dc969c357aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -459,14 +459,11 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
 	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
-		EXT4_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR);
+		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
 		ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	}
-	unlock_super(sb);
 }
 
 /*
-- 
cgit v1.2.3


From 59287c0913cc9a6c75712a775f6c1c1ef418ef3b Mon Sep 17 00:00:00 2001
From: Marcus Meissner <meissner@suse.de>
Date: Wed, 6 Dec 2006 20:36:24 -0800
Subject: [PATCH] binfmt_elf: randomize PIE binaries (2nd try)

Randomizes -pie compiled binaries from 64k (0x10000) up to ELF_ET_DYN_BASE.

0 -> 64k is excluded to allow NULL ptr accesses to fail.

Signed-off-by: Marcus Meissner <meissner@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index cc72bb43061..b2efbaead6c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -856,7 +856,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
-			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+			if (current->flags & PF_RANDOMIZE)
+				load_bias = randomize_range(0x10000,
+							    ELF_ET_DYN_BASE,
+							    0);
+			else
+				load_bias = ELF_ET_DYN_BASE;
+			load_bias = ELF_PAGESTART(load_bias - vaddr);
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-- 
cgit v1.2.3


From 40b851348fe9bf49c26025b34261d25142269b60 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:36:26 -0800
Subject: [PATCH] handle ext3 directory corruption better

I've been using Steve Grubb's purely evil "fsfuzzer" tool, at
http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz

Basically it makes a filesystem, splats some random bits over it, then
tries to mount it and do some simple filesystem actions.

At best, the filesystem catches the corruption gracefully.  At worst,
things spin out of control.

As you might guess, we found a couple places in ext3 where things spin out
of control :)

First, we had a corrupted directory that was never checked for
consistency...  it was corrupt, and pointed to another bad "entry" of
length 0.  The for() loop looped forever, since the length of
ext3_next_entry(de) was 0, and we kept looking at the same pointer over and
over and over and over...  I modeled this check and subsequent action on
what is done for other directory types in ext3_readdir...

(adding this check adds some computational expense; I am testing a followup
patch to reduce the number of times we check and re-check these directory
entries, in all cases.  Thanks for the idea, Andreas).

Next we had a root directory inode which had a corrupted size, claimed to
be > 200M on a 4M filesystem.  There was only really 1 block in the
directory, but because the size was so large, readdir kept coming back for
more, spewing thousands of printk's along the way.

Per Andreas' suggestion, if we're in this read error condition and we're
trying to read an offset which is greater than i_blocks worth of bytes,
stop trying, and break out of the loop.

With these two changes fsfuzz test survives quite well on ext3.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c   | 3 +++
 fs/ext3/namei.c | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index d0b54f30b91..5a9313ecd4e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -154,6 +154,9 @@ static int ext3_readdir(struct file * filp,
 			ext3_error (sb, "ext3_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
+			/* corrupt size?  Maybe no more blocks to read */
+			if (filp->f_pos > inode->i_blocks << 9)
+				break;
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
 		}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 906731a20f1..60d2f9dbdb0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 					   dir->i_sb->s_blocksize -
 					   EXT3_DIR_REC_LEN(0));
 	for (; de < top; de = ext3_next_entry(de)) {
+		if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+					(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+						+((char *)de - bh->b_data))) {
+			/* On error, skip the f_pos to the next block. */
+			dir_file->f_pos = (dir_file->f_pos |
+					(dir->i_sb->s_blocksize - 1)) + 1;
+			brelse (bh);
+			return count;
+		}
 		ext3fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
-- 
cgit v1.2.3


From e6c4021190c828d7fa24a464db589f86c6708341 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:36:28 -0800
Subject: [PATCH] handle ext4 directory corruption better

I've been using Steve Grubb's purely evil "fsfuzzer" tool, at
http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz

Basically it makes a filesystem, splats some random bits over it, then
tries to mount it and do some simple filesystem actions.

At best, the filesystem catches the corruption gracefully.  At worst,
things spin out of control.

As you might guess, we found a couple places in ext4 where things spin out
of control :)

First, we had a corrupted directory that was never checked for
consistency...  it was corrupt, and pointed to another bad "entry" of
length 0.  The for() loop looped forever, since the length of
ext4_next_entry(de) was 0, and we kept looking at the same pointer over and
over and over and over...  I modeled this check and subsequent action on
what is done for other directory types in ext4_readdir...

(adding this check adds some computational expense; I am testing a followup
patch to reduce the number of times we check and re-check these directory
entries, in all cases.  Thanks for the idea, Andreas).

Next we had a root directory inode which had a corrupted size, claimed to
be > 200M on a 4M filesystem.  There was only really 1 block in the
directory, but because the size was so large, readdir kept coming back for
more, spewing thousands of printk's along the way.

Per Andreas' suggestion, if we're in this read error condition and we're
trying to read an offset which is greater than i_blocks worth of bytes,
stop trying, and break out of the loop.

With these two changes fsfuzz test survives quite well on ext4.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/dir.c   | 3 +++
 fs/ext4/namei.c | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index f8595787a70..f2ed3e7fb9f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -153,6 +153,9 @@ static int ext4_readdir(struct file * filp,
 			ext4_error (sb, "ext4_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
+			/* corrupt size?  Maybe no more blocks to read */
+			if (filp->f_pos > inode->i_blocks << 9)
+				break;
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
 		}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8b1bd03d20f..859990eac50 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 					   dir->i_sb->s_blocksize -
 					   EXT4_DIR_REC_LEN(0));
 	for (; de < top; de = ext4_next_entry(de)) {
+		if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+					(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+						+((char *)de - bh->b_data))) {
+			/* On error, skip the f_pos to the next block. */
+			dir_file->f_pos = (dir_file->f_pos |
+					(dir->i_sb->s_blocksize - 1)) + 1;
+			brelse (bh);
+			return count;
+		}
 		ext4fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
-- 
cgit v1.2.3


From 736c4b8572ac24b1e6fd58d00872305a120ac700 Mon Sep 17 00:00:00 2001
From: Mika Kukkonen <mikukkon@gmail.com>
Date: Wed, 6 Dec 2006 20:36:29 -0800
Subject: [PATCH] Function v9fs_get_idpool returns int, not u32 as called twice
 in fs/9p/vfs_inode.c

Function v9fs_get_idpool returns int, not u32.  Actually it returns -1 on
errors, and these two callers check if the value is smaller than 0, which
was caught by gcc with extra warning flags.  Compile tested only but should
be OK, as the value computed in v9fs_get_idpool() is also int.

Signed-of-by: Mika Kukkonen <mikukkon@iki.fi>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@lanl.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5241c600ce2..18f26cdfd88 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -256,7 +256,7 @@ static int
 v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
 	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
 {
-	u32 fid;
+	int fid;
 	int err;
 	struct v9fs_fcall *fcall;
 
@@ -310,7 +310,7 @@ static struct v9fs_fid*
 v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
 {
 	int err;
-	u32 nfid;
+	int nfid;
 	struct v9fs_fid *ret;
 	struct v9fs_fcall *fcall;
 
-- 
cgit v1.2.3


From 841d5fb7c75260f76ae682648b28a3dca724940d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Dec 2006 20:36:35 -0800
Subject: [PATCH] binfmt: fix uaccess handling

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b2efbaead6c..68e20d5bfe1 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -243,8 +243,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	if (interp_aout) {
 		argv = sp + 2;
 		envp = argv + argc + 1;
-		__put_user((elf_addr_t)(unsigned long)argv, sp++);
-		__put_user((elf_addr_t)(unsigned long)envp, sp++);
+		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
+		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
+			return -EFAULT;
 	} else {
 		argv = sp;
 		envp = argv + argc + 1;
@@ -254,7 +255,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	p = current->mm->arg_end = current->mm->arg_start;
 	while (argc-- > 0) {
 		size_t len;
-		__put_user((elf_addr_t)p, argv++);
+		if (__put_user((elf_addr_t)p, argv++))
+			return -EFAULT;
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 			return 0;
@@ -265,7 +267,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	current->mm->arg_end = current->mm->env_start = p;
 	while (envc-- > 0) {
 		size_t len;
-		__put_user((elf_addr_t)p, envp++);
+		if (__put_user((elf_addr_t)p, envp++))
+			return -EFAULT;
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 			return 0;
-- 
cgit v1.2.3


From 7116e994b47f3988389be4ceee67dac64b56e0d0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Dec 2006 20:36:36 -0800
Subject: [PATCH] compat: fix uaccess handling

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c       | 35 +++++++++++++++++++----------------
 fs/compat_ioctl.c | 33 ++++++++++++++++++++-------------
 2 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 7aef5412f49..a7e3f162fb1 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1144,7 +1144,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 	lastdirent = buf.previous;
 	if (lastdirent) {
 		typeof(lastdirent->d_off) d_off = file->f_pos;
-		__put_user_unaligned(d_off, &lastdirent->d_off);
+		error = -EFAULT;
+		if (__put_user_unaligned(d_off, &lastdirent->d_off))
+			goto out_putf;
 		error = count - buf.count;
 	}
 
@@ -1611,14 +1613,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 		nr &= ~1UL;
 		while (nr) {
 			unsigned long h, l;
-			__get_user(l, ufdset);
-			__get_user(h, ufdset+1);
+			if (__get_user(l, ufdset) || __get_user(h, ufdset+1))
+				return -EFAULT;
 			ufdset += 2;
 			*fdset++ = h << 32 | l;
 			nr -= 2;
 		}
-		if (odd)
-			__get_user(*fdset, ufdset);
+		if (odd && __get_user(*fdset, ufdset))
+			return -EFAULT;
 	} else {
 		/* Tricky, must clear full unsigned long in the
 		 * kernel fdset at the end, this makes sure that
@@ -1630,14 +1632,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 }
 
 static
-void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
-			unsigned long *fdset)
+int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
+		      unsigned long *fdset)
 {
 	unsigned long odd;
 	nr = ROUND_UP(nr, __COMPAT_NFDBITS);
 
 	if (!ufdset)
-		return;
+		return 0;
 
 	odd = nr & 1UL;
 	nr &= ~1UL;
@@ -1645,13 +1647,14 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 		unsigned long h, l;
 		l = *fdset++;
 		h = l >> 32;
-		__put_user(l, ufdset);
-		__put_user(h, ufdset+1);
+		if (__put_user(l, ufdset) || __put_user(h, ufdset+1))
+			return -EFAULT;
 		ufdset += 2;
 		nr -= 2;
 	}
-	if (odd)
-		__put_user(*fdset, ufdset);
+	if (odd && __put_user(*fdset, ufdset))
+		return -EFAULT;
+	return 0;
 }
 
 
@@ -1726,10 +1729,10 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 		ret = 0;
 	}
 
-	compat_set_fd_set(n, inp, fds.res_in);
-	compat_set_fd_set(n, outp, fds.res_out);
-	compat_set_fd_set(n, exp, fds.res_ex);
-
+	if (compat_set_fd_set(n, inp, fds.res_in) ||
+	    compat_set_fd_set(n, outp, fds.res_out) ||
+	    compat_set_fd_set(n, exp, fds.res_ex))
+		ret = -EFAULT;
 out:
 	kfree(bits);
 out_nofds:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a91f2628c98..6e317768555 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -211,8 +211,10 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned lon
 	up_native =
 		compat_alloc_user_space(sizeof(struct video_still_picture));
 
-	put_user(compat_ptr(fp), &up_native->iFrame);
-	put_user(size, &up_native->size);
+	err =  put_user(compat_ptr(fp), &up_native->iFrame);
+	err |= put_user(size, &up_native->size);
+	if (err)
+		return -EFAULT;
 
 	err = sys_ioctl(fd, cmd, (unsigned long) up_native);
 
@@ -236,8 +238,10 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned
 	err |= get_user(length, &up->length);
 
 	up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
-	put_user(compat_ptr(palp), &up_native->palette);
-	put_user(length, &up_native->length);
+	err  = put_user(compat_ptr(palp), &up_native->palette);
+	err |= put_user(length, &up_native->length);
+	if (err)
+		return -EFAULT;
 
 	err = sys_ioctl(fd, cmd, (unsigned long) up_native);
 
@@ -2043,16 +2047,19 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         struct serial_struct ss;
         mm_segment_t oldseg = get_fs();
         __u32 udata;
+	unsigned int base;
 
         if (cmd == TIOCSSERIAL) {
                 if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
                         return -EFAULT;
                 if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
 			return -EFAULT;
-                __get_user(udata, &ss32->iomem_base);
+                if (__get_user(udata, &ss32->iomem_base))
+			return -EFAULT;
                 ss.iomem_base = compat_ptr(udata);
-                __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
-                __get_user(ss.port_high, &ss32->port_high);
+                if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+		    __get_user(ss.port_high, &ss32->port_high))
+			return -EFAULT;
                 ss.iomap_base = 0UL;
         }
         set_fs(KERNEL_DS);
@@ -2063,12 +2070,12 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
                         return -EFAULT;
                 if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
 			return -EFAULT;
-                __put_user((unsigned long)ss.iomem_base  >> 32 ?
-                            0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
-                            &ss32->iomem_base);
-                __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
-                __put_user(ss.port_high, &ss32->port_high);
-
+		base = (unsigned long)ss.iomem_base  >> 32 ?
+			0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
+		if (__put_user(base, &ss32->iomem_base) ||
+		    __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+		    __put_user(ss.port_high, &ss32->port_high))
+			return -EFAULT;
         }
         return err;
 }
-- 
cgit v1.2.3


From 4a6e617a4bec9fb2ee4a16cf59565b2af5049e12 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:37:04 -0800
Subject: [PATCH] fs/*: trivial vsnprintf() conversion

It would very lame to get buffer overflow via one of the following.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c    | 2 +-
 fs/affs/amigaffs.c | 4 ++--
 fs/jfs/super.c     | 2 +-
 fs/ocfs2/super.c   | 4 ++--
 fs/udf/super.c     | 4 ++--
 fs/ufs/super.c     | 6 +++---
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index e5a205c9f42..5023351a7af 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -36,7 +36,7 @@ void __adfs_error(struct super_block *sb, const char *function, const char *fmt,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	printk(KERN_CRIT "ADFS-fs error (device %s)%s%s: %s\n",
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index ccd624ef427..f4de4b98004 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -445,7 +445,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
 	va_list	 args;
 
 	va_start(args,fmt);
-	vsprintf(ErrorBuffer,fmt,args);
+	vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
 	va_end(args);
 
 	printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id,
@@ -461,7 +461,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
 	va_list	 args;
 
 	va_start(args,fmt);
-	vsprintf(ErrorBuffer,fmt,args);
+	vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
 	va_end(args);
 
 	printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ca3e1912853..846ac8f3451 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -93,7 +93,7 @@ void jfs_error(struct super_block *sb, const char * function, ...)
 	va_list args;
 
 	va_start(args, function);
-	vsprintf(error_buf, function, args);
+	vsnprintf(error_buf, sizeof(error_buf), function, args);
 	va_end(args);
 
 	printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0524f8a04ad..4bf39540e65 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1674,7 +1674,7 @@ void __ocfs2_error(struct super_block *sb,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	/* Not using mlog here because we want to show the actual
@@ -1695,7 +1695,7 @@ void __ocfs2_abort(struct super_block* sb,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 397f54aaf66..1dbc2955f02 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1709,7 +1709,7 @@ void udf_error(struct super_block *sb, const char *function,
 		sb->s_dirt = 1;
 	}
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 	printk (KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
@@ -1721,7 +1721,7 @@ void udf_warning(struct super_block *sb, const char *function,
 	va_list args;
 
 	va_start (args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 	printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 0b18d2cc2ef..8a8e9382ec0 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -224,7 +224,7 @@ void ufs_error (struct super_block * sb, const char * function,
 		sb->s_flags |= MS_RDONLY;
 	}
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
 	case UFS_MOUNT_ONERROR_PANIC:
@@ -255,7 +255,7 @@ void ufs_panic (struct super_block * sb, const char * function,
 		sb->s_dirt = 1;
 	}
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	sb->s_flags |= MS_RDONLY;
 	printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
@@ -268,7 +268,7 @@ void ufs_warning (struct super_block * sb, const char * function,
 	va_list args;
 
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
-- 
cgit v1.2.3


From 352d94d040053d93bf1cf4acb4be9635e69d9200 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:37:04 -0800
Subject: [PATCH] hpfs: bring hpfs_error() into shape

 - switch to error message buffer in .bss
 - missing va_end() (htf it worked before?)
 - use vsnprintf()
 - rename variables to understandable "fmt", "args".
 - "const char *fmt", yes.
 - add __attribute__((format ...

Still, put that coffee down before reading more.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/hpfs_fn.h |  3 ++-
 fs/hpfs/super.c   | 23 +++++++++--------------
 2 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 32ab51e42b9..1c07aa82d32 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -317,7 +317,8 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb)
 
 /* super.c */
 
-void hpfs_error(struct super_block *, char *, ...);
+void hpfs_error(struct super_block *, const char *, ...)
+	__attribute__((format (printf, 2, 3)));
 int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
 unsigned hpfs_count_one_bitmap(struct super_block *, secno);
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 34d68e21171..d4abc1a1d56 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -46,21 +46,17 @@ static void unmark_dirty(struct super_block *s)
 }
 
 /* Filesystem error... */
+static char err_buf[1024];
 
-#define ERR_BUF_SIZE 1024
-
-void hpfs_error(struct super_block *s, char *m,...)
+void hpfs_error(struct super_block *s, const char *fmt, ...)
 {
-	char *buf;
-	va_list l;
-	va_start(l, m);
-	if (!(buf = kmalloc(ERR_BUF_SIZE, GFP_KERNEL)))
-		printk("HPFS: No memory for error message '%s'\n",m);
-	else if (vsprintf(buf, m, l) >= ERR_BUF_SIZE)
-		printk("HPFS: Grrrr... Kernel memory corrupted ... going on, but it'll crash very soon :-(\n");
-	printk("HPFS: filesystem error: ");
-	if (buf) printk("%s", buf);
-	else printk("%s\n",m);
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+	va_end(args);
+
+	printk("HPFS: filesystem error: %s", err_buf);
 	if (!hpfs_sb(s)->sb_was_error) {
 		if (hpfs_sb(s)->sb_err == 2) {
 			printk("; crashing the system because you wanted it\n");
@@ -76,7 +72,6 @@ void hpfs_error(struct super_block *s, char *m,...)
 		} else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
 		else printk("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n");
 	} else printk("\n");
-	kfree(buf);
 	hpfs_sb(s)->sb_was_error = 1;
 }
 
-- 
cgit v1.2.3


From 18debbbcce1306f0bbb1c71cf587fd90413acab6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 6 Dec 2006 20:37:05 -0800
Subject: [PATCH] hpfs: fix printk format warnings

Fix hpfs printk warnings:

  fs/hpfs/dir.c:87: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/dir.c:147: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long int'
  fs/hpfs/dir.c:148: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long int'
  fs/hpfs/dnode.c:537: warning: format '%08x' expects type 'unsigned int', but argument 5 has type 'long unsigned int'
  fs/hpfs/dnode.c:854: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'loff_t'
  fs/hpfs/ea.c:247: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/inode.c:254: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/map.c:129: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:135: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:140: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:147: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:154: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/dir.c   | 10 +++++++---
 fs/hpfs/dnode.c | 13 +++++++++----
 fs/hpfs/ea.c    |  5 +++--
 fs/hpfs/inode.c |  5 ++++-
 fs/hpfs/map.c   | 20 ++++++++++++++------
 5 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index ecc9180645a..594f9c428fc 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -84,7 +84,8 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		if (!fno->dirflag) {
 			e = 1;
-			hpfs_error(inode->i_sb, "not a directory, fnode %08x",inode->i_ino);
+			hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
+					(unsigned long)inode->i_ino);
 		}
 		if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) {
 			e = 1;
@@ -144,8 +145,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		if (de->first || de->last) {
 			if (hpfs_sb(inode->i_sb)->sb_chk) {
-				if (de->first && !de->last && (de->namelen != 2 || de ->name[0] != 1 || de->name[1] != 1)) hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08x", old_pos);
-				if (de->last && (de->namelen != 1 || de ->name[0] != 255)) hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08x", old_pos);
+				if (de->first && !de->last && (de->namelen != 2
+				    || de ->name[0] != 1 || de->name[1] != 1))
+					hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", old_pos);
+				if (de->last && (de->namelen != 1 || de ->name[0] != 255))
+					hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", old_pos);
 			}
 			hpfs_brelse4(&qbh);
 			goto again;
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 229ff2fb180..fe83c2b7d2d 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -533,10 +533,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 			struct buffer_head *bh;
 			struct dnode *d1;
 			struct quad_buffer_head qbh1;
-			if (hpfs_sb(i->i_sb)->sb_chk) if (up != i->i_ino) {
-				hpfs_error(i->i_sb, "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08x", dno, up, i->i_ino);
+			if (hpfs_sb(i->i_sb)->sb_chk)
+			    if (up != i->i_ino) {
+				hpfs_error(i->i_sb,
+					"bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+					dno, up, (unsigned long)i->i_ino);
 				return;
-			}
+			    }
 			if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
 				d1->up = up;
 				d1->root_dnode = 1;
@@ -851,7 +854,9 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
 	/* Going to the next dirent */
 	if ((d = de_next_de(de)) < dnode_end_de(dnode)) {
 		if (!(++*posp & 077)) {
-			hpfs_error(inode->i_sb, "map_pos_dirent: pos crossed dnode boundary; pos = %08x", *posp);
+			hpfs_error(inode->i_sb,
+				"map_pos_dirent: pos crossed dnode boundary; pos = %08llx",
+				(unsigned long long)*posp);
 			goto bail;
 		}
 		/* We're going down the tree */
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 66339dc030e..547a8384571 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -243,8 +243,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, char *key, char *data
 		fnode->ea_offs = 0xc4;
 	}
 	if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) {
-		hpfs_error(s, "fnode %08x: ea_offs == %03x, ea_size_s == %03x",
-			inode->i_ino, fnode->ea_offs, fnode->ea_size_s);
+		hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
+			(unsigned long)inode->i_ino,
+			fnode->ea_offs, fnode->ea_size_s);
 		return;
 	}
 	if ((fnode->ea_size_s || !fnode->ea_size_l) &&
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 7faef8544f3..85d3e1d9ac0 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -251,7 +251,10 @@ void hpfs_write_inode_nolock(struct inode *i)
 			de->file_size = 0;
 			hpfs_mark_4buffers_dirty(&qbh);
 			hpfs_brelse4(&qbh);
-		} else hpfs_error(i->i_sb, "directory %08x doesn't have '.' entry", i->i_ino);
+		} else
+			hpfs_error(i->i_sb,
+				"directory %08lx doesn't have '.' entry",
+				(unsigned long)i->i_ino);
 	}
 	mark_buffer_dirty(bh);
 	brelse(bh);
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 0fecdac22e4..c4724589b2e 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -126,32 +126,40 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
 			struct extended_attribute *ea;
 			struct extended_attribute *ea_end;
 			if (fnode->magic != FNODE_MAGIC) {
-				hpfs_error(s, "bad magic on fnode %08x", ino);
+				hpfs_error(s, "bad magic on fnode %08lx",
+					(unsigned long)ino);
 				goto bail;
 			}
 			if (!fnode->dirflag) {
 				if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes !=
 				    (fnode->btree.internal ? 12 : 8)) {
-					hpfs_error(s, "bad number of nodes in fnode %08x", ino);
+					hpfs_error(s,
+					   "bad number of nodes in fnode %08lx",
+					    (unsigned long)ino);
 					goto bail;
 				}
 				if (fnode->btree.first_free !=
 				    8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
-					hpfs_error(s, "bad first_free pointer in fnode %08x", ino);
+					hpfs_error(s,
+					    "bad first_free pointer in fnode %08lx",
+					    (unsigned long)ino);
 					goto bail;
 				}
 			}
 			if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 ||
 			   (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) {
-				hpfs_error(s, "bad EA info in fnode %08x: ea_offs == %04x ea_size_s == %04x",
-					ino, fnode->ea_offs, fnode->ea_size_s);
+				hpfs_error(s,
+					"bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
+					(unsigned long)ino,
+					fnode->ea_offs, fnode->ea_size_s);
 				goto bail;
 			}
 			ea = fnode_ea(fnode);
 			ea_end = fnode_end_ea(fnode);
 			while (ea != ea_end) {
 				if (ea > ea_end) {
-					hpfs_error(s, "bad EA in fnode %08x", ino);
+					hpfs_error(s, "bad EA in fnode %08lx",
+						(unsigned long)ino);
 					goto bail;
 				}
 				ea = next_ea(ea);
-- 
cgit v1.2.3


From dc168427e6250a5a24c59f34afed6538092dab42 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Wed, 6 Dec 2006 20:37:07 -0800
Subject: [PATCH] VFS: extra check inside dentry_unhash()

d_count check after dget() is always true.

Signed-off-by:	Vasily Averin <vvs@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 61f99c1967d..db1bca26d88 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1998,8 +1998,7 @@ asmlinkage long sys_mkdir(const char __user *pathname, int mode)
 void dentry_unhash(struct dentry *dentry)
 {
 	dget(dentry);
-	if (atomic_read(&dentry->d_count))
-		shrink_dcache_parent(dentry);
+	shrink_dcache_parent(dentry);
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count) == 2)
-- 
cgit v1.2.3


From 072330584404392dae44cd0793ac9b316cff045b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:12 -0800
Subject: [PATCH] vfs_getattr(): remove dead code

As Mikulas points out, (1 << anything) won't be evaluating to zero.  This code
is long-dead.

Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/stat.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/stat.c b/fs/stat.c
index bca07eb2003..a0ebfc7f8a6 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -51,13 +51,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
 	generic_fillattr(inode, stat);
-	if (!stat->blksize) {
-		struct super_block *s = inode->i_sb;
-		unsigned blocks;
-		blocks = (stat->size+s->s_blocksize-1) >> s->s_blocksize_bits;
-		stat->blocks = (s->s_blocksize / 512) * blocks;
-		stat->blksize = s->s_blocksize;
-	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 3a229b39eb8497ae5f8077f81f7c8c3e1aacd624 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:14 -0800
Subject: [PATCH] ext3: uninline large functions

Saves nearly 4kbytes on x86.

Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/Makefile   |  2 +-
 fs/ext3/ext3_jbd.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 fs/ext3/ext3_jbd.c

(limited to 'fs')

diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile
index 704cd44a40c..e77766a8b3f 100644
--- a/fs/ext3/Makefile
+++ b/fs/ext3/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_EXT3_FS) += ext3.o
 
 ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o ext3_jbd.o
 
 ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
new file mode 100644
index 00000000000..e1f91fd26a9
--- /dev/null
+++ b/fs/ext3/ext3_jbd.c
@@ -0,0 +1,59 @@
+/*
+ * Interface between ext3 and JBD
+ */
+
+#include <linux/ext3_jbd.h>
+
+int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_undo_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_write_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_forget(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_forget(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_revoke(const char *where, handle_t *handle,
+				unsigned long blocknr, struct buffer_head *bh)
+{
+	int err = journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_get_create_access(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_get_create_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_dirty_metadata(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_metadata(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
-- 
cgit v1.2.3


From 8984d137df669a6e94dbce7b87095e4ce80b9e67 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:15 -0800
Subject: [PATCH] ext4: uninline large functions

Saves nearly 4kbytes on x86.

Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile    |  3 ++-
 fs/ext4/ext4_jbd2.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 fs/ext4/ext4_jbd2.c

(limited to 'fs')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a6acb96ebeb..ae6e7e502ac 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,7 +5,8 @@
 obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
+		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+		   ext4_jbd2.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
new file mode 100644
index 00000000000..d6afe4e2734
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.c
@@ -0,0 +1,59 @@
+/*
+ * Interface between ext4 and JBD
+ */
+
+#include <linux/ext4_jbd2.h>
+
+int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_undo_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_write_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_forget(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_forget(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_revoke(const char *where, handle_t *handle,
+				ext4_fsblk_t blocknr, struct buffer_head *bh)
+{
+	int err = jbd2_journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_get_create_access(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_create_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_dirty_metadata(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = jbd2_journal_dirty_metadata(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
-- 
cgit v1.2.3


From 8bb0269160df2a60764013994d0bc5165406cf4a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.org.uk>
Date: Wed, 6 Dec 2006 20:37:20 -0800
Subject: [PATCH] corrupted cramfs filesystems cause kernel oops

Steve Grubb's fzfuzzer tool (http://people.redhat.com/sgrubb/files/
fsfuzzer-0.6.tar.gz) generates corrupt Cramfs filesystems which cause
Cramfs to kernel oops in cramfs_uncompress_block().  The cause of the oops
is an unchecked corrupted block length field read by cramfs_readpage().

This patch adds a sanity check to cramfs_readpage() which checks that the
block length field is sensible.  The (PAGE_CACHE_SIZE << 1) size check is
intentional, even though the uncompressed data is not going to be larger
than PAGE_CACHE_SIZE, gzip sometimes generates compressed data larger than
the original source data.  Mkcramfs checks that the compressed size is
always less than or equal to PAGE_CACHE_SIZE << 1.  Of course Cramfs could
use the original uncompressed data in this case, but it doesn't.

Signed-off-by: Phillip Lougher <phillip@lougher.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a624c3ec818..0509cedd415 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -481,6 +481,8 @@ static int cramfs_readpage(struct file *file, struct page * page)
 		pgdata = kmap(page);
 		if (compr_len == 0)
 			; /* hole */
+		else if (compr_len > (PAGE_CACHE_SIZE << 1))
+			printk(KERN_ERR "cramfs: bad compressed blocksize %u\n", compr_len);
 		else {
 			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
-- 
cgit v1.2.3


From 6fb50ea79cb869667adaa71ed32cc15dd73986de Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:37:25 -0800
Subject: [PATCH] ext4_ext_split(): remove dead code

The Coverity checker noted that this was dead code, since in all places
above in this function, "err" is immediately checked.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2608dce18f3..1442ccbaea7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -800,9 +800,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 
 	/* insert new index */
-	if (err)
-		goto cleanup;
-
 	err = ext4_ext_insert_index(handle, inode, path + at,
 				    le32_to_cpu(border), newblock);
 
-- 
cgit v1.2.3


From 93f210dd9e614ddab7ecef0b4c9ba6ad3720d860 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:33 -0800
Subject: [PATCH] protect ext2 ioctl modifying append_only immutable etc with
 i_mutex

Port commit a090d9132c1e53e3517111123680c15afb25c0a4 into ext2:

All modifications of ->i_flags in inodes that might be visible to somebody
else must be under ->i_mutex.  That patch fixes ext2 ioctl() setting S_APPEND.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/ioctl.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 1dfba77eab1..e3cf8c81507 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -44,6 +44,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		if (!S_ISDIR(inode->i_mode))
 			flags &= ~EXT2_DIRSYNC_FL;
 
+		mutex_lock(&inode->i_mutex);
 		oldflags = ei->i_flags;
 
 		/*
@@ -53,13 +54,16 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
 		if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
-			if (!capable(CAP_LINUX_IMMUTABLE))
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
+			}
 		}
 
 		flags = flags & EXT2_FL_USER_MODIFIABLE;
 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
 		ei->i_flags = flags;
+		mutex_unlock(&inode->i_mutex);
 
 		ext2_set_inode_flags(inode);
 		inode->i_ctime = CURRENT_TIME_SEC;
-- 
cgit v1.2.3


From e92a4d595b464c4aae64be39ca61a9ffe9c8b278 Mon Sep 17 00:00:00 2001
From: Andrey Savochkin <saw@sw.ru>
Date: Wed, 6 Dec 2006 20:37:34 -0800
Subject: [PATCH] retries in ext3_prepare_write() violate ordering requirements

In journal=ordered or journal=data mode retry in ext3_prepare_write()
breaks the requirements of journaling of data with respect to metadata.
The fix is to call commit_write to commit allocated zero blocks before
retry.

Signed-off-by: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 03ba5bcab18..beaf25f5112 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1148,37 +1148,102 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
+/*
+ * The idea of this helper function is following:
+ * if prepare_write has allocated some blocks, but not all of them, the
+ * transaction must include the content of the newly allocated blocks.
+ * This content is expected to be set to zeroes by block_prepare_write().
+ * 2006/10/14  SAW
+ */
+static int ext3_prepare_failure(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	struct address_space *mapping;
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int ret;
+	handle_t *handle = ext3_journal_current_handle();
+
+	mapping = page->mapping;
+	if (ext3_should_writeback_data(mapping->host)) {
+		/* optimization: no constraints about data */
+skip:
+		return ext3_journal_stop(handle);
+	}
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from)
+			continue;
+		if (block_start >= to) {
+			block_start = to;
+			break;
+		}
+		if (!buffer_mapped(bh))
+		/* prepare_write failed on this bh */
+			break;
+		if (ext3_should_journal_data(mapping->host)) {
+			ret = do_journal_get_write_access(handle, bh);
+			if (ret) {
+				ext3_journal_stop(handle);
+				return ret;
+			}
+		}
+	/*
+	 * block_start here becomes the first block where the current iteration
+	 * of prepare_write failed.
+	 */
+	}
+	if (block_start <= from)
+		goto skip;
+
+	/* commit allocated and zeroed buffers */
+	return mapping->a_ops->commit_write(file, page, from, block_start);
+}
+
 static int ext3_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, ret2;
+	int needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
-	}
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
-		goto prepare_write_failed;
+		goto failure;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+		if (ret)
+			/* fatal error, just put the handle and return */
+			journal_stop(handle);
 	}
-prepare_write_failed:
-	if (ret)
-		ext3_journal_stop(handle);
+	return ret;
+
+failure:
+	ret2 = ext3_prepare_failure(file, page, from, to);
+	if (ret2 < 0)
+		return ret2;
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-out:
+	/* retry number exceeded, or other error like -EDQUOT */
 	return ret;
 }
 
-- 
cgit v1.2.3


From b46be05004abb419e303e66e143eed9f8a6e9f3f Mon Sep 17 00:00:00 2001
From: Andrey Savochkin <saw@sw.ru>
Date: Wed, 6 Dec 2006 20:37:36 -0800
Subject: [PATCH] retries in ext4_prepare_write() violate ordering requirements

In journal=ordered or journal=data mode retry in ext4_prepare_write()
breaks the requirements of journaling of data with respect to metadata.
The fix is to call commit_write to commit allocated zero blocks before
retry.

Signed-off-by: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0a60ec5a16d..1d85d4ec959 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,37 +1147,102 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
+/*
+ * The idea of this helper function is following:
+ * if prepare_write has allocated some blocks, but not all of them, the
+ * transaction must include the content of the newly allocated blocks.
+ * This content is expected to be set to zeroes by block_prepare_write().
+ * 2006/10/14  SAW
+ */
+static int ext4_prepare_failure(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	struct address_space *mapping;
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int ret;
+	handle_t *handle = ext4_journal_current_handle();
+
+	mapping = page->mapping;
+	if (ext4_should_writeback_data(mapping->host)) {
+		/* optimization: no constraints about data */
+skip:
+		return ext4_journal_stop(handle);
+	}
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from)
+			continue;
+		if (block_start >= to) {
+			block_start = to;
+			break;
+		}
+		if (!buffer_mapped(bh))
+		/* prepare_write failed on this bh */
+			break;
+		if (ext4_should_journal_data(mapping->host)) {
+			ret = do_journal_get_write_access(handle, bh);
+			if (ret) {
+				ext4_journal_stop(handle);
+				return ret;
+			}
+		}
+	/*
+	 * block_start here becomes the first block where the current iteration
+	 * of prepare_write failed.
+	 */
+	}
+	if (block_start <= from)
+		goto skip;
+
+	/* commit allocated and zeroed buffers */
+	return mapping->a_ops->commit_write(file, page, from, block_start);
+}
+
 static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, ret2;
+	int needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
-	}
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
-		goto prepare_write_failed;
+		goto failure;
 
 	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+		if (ret)
+			/* fatal error, just put the handle and return */
+			journal_stop(handle);
 	}
-prepare_write_failed:
-	if (ret)
-		ext4_journal_stop(handle);
+	return ret;
+
+failure:
+	ret2 = ext4_prepare_failure(file, page, from, to);
+	if (ret2 < 0)
+		return ret2;
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-out:
+	/* retry number exceeded, or other error like -EDQUOT */
 	return ret;
 }
 
-- 
cgit v1.2.3


From c36264dfb2d6fa6383082de0a1bba8e12b477da1 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 6 Dec 2006 20:37:42 -0800
Subject: [PATCH] remove the syslog interface when printk is disabled

Attempts to read() from the non-existent dmesg buffer will return zero and
userspace tends to get stuck in a busyloop.

So just remove /dev/kmsg altogether if CONFIG_PRINTK=n.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/Makefile    | 3 ++-
 fs/proc/proc_misc.c | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 7431d7ba2d0..f6c77627257 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,8 +8,9 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		kmsg.o proc_tty.o proc_misc.o
+		proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
 proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
+proc-$(CONFIG_PRINTK)	+= kmsg.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 93c43b676e5..51815cece6f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -696,9 +696,11 @@ void __init proc_misc_init(void)
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	/* And now for trickier ones */
+#ifdef CONFIG_PRINTK
 	entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
 	if (entry)
 		entry->proc_fops = &proc_kmsg_operations;
+#endif
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From 386d9a7edd9f3492c99124b0a659e9ed7abb30f9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Wed, 6 Dec 2006 20:37:53 -0800
Subject: [PATCH] elf: Always define elf_addr_t in linux/elf.h

Define elf_addr_t in linux/elf.h.  The size of the type is determined using
ELF_CLASS.  This allows us to remove the defines that today are spread all
over .c and .h files.

Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Cc: Daniel Jacobowitz <drow@false.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c       | 4 ----
 fs/binfmt_elf_fdpic.c | 3 ---
 2 files changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 68e20d5bfe1..14ea630a857 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -47,10 +47,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
 
-#ifndef elf_addr_t
-#define elf_addr_t unsigned long
-#endif
-
 /*
  * If we don't support core dumping, then supply a NULL so we
  * don't even try.
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f86d5c9ce5e..ed9a61c6beb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -40,9 +40,6 @@
 #include <asm/pgalloc.h>
 
 typedef char *elf_caddr_t;
-#ifndef elf_addr_t
-#define elf_addr_t unsigned long
-#endif
 
 #if 0
 #define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
-- 
cgit v1.2.3


From 360276042d7a8369ce912acff99c1c4de394b312 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Wed, 6 Dec 2006 20:38:00 -0800
Subject: [PATCH] elf: fix kcore note size calculation

 - Define "CORE" string as CORE_STR in single common place.
 - Include terminating zero in CORE_STR length calculation for elf_buflen.
 - Use roundup(,4) to include alignment in elf_buflen calculation.

[akpm@osdl.org: simplification suggested by Roland]
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Cc: Daniel Jacobowitz <drow@false.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/kcore.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1294eda4aca..1be73082edd 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -22,6 +22,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#define CORE_STR "CORE"
 
 static int open_kcore(struct inode * inode, struct file * filp)
 {
@@ -82,10 +83,11 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	}
 	*elf_buflen =	sizeof(struct elfhdr) + 
 			(*nphdr + 2)*sizeof(struct elf_phdr) + 
-			3 * (sizeof(struct elf_note) + 4) +
-			sizeof(struct elf_prstatus) +
-			sizeof(struct elf_prpsinfo) +
-			sizeof(struct task_struct);
+			3 * ((sizeof(struct elf_note)) +
+			     roundup(sizeof(CORE_STR), 4)) +
+			roundup(sizeof(struct elf_prstatus), 4) +
+			roundup(sizeof(struct elf_prpsinfo), 4) +
+			roundup(sizeof(struct task_struct), 4);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
 	return size + *elf_buflen;
 }
@@ -210,7 +212,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	nhdr->p_offset	= offset;
 
 	/* set up the process status */
-	notes[0].name = "CORE";
+	notes[0].name = CORE_STR;
 	notes[0].type = NT_PRSTATUS;
 	notes[0].datasz = sizeof(struct elf_prstatus);
 	notes[0].data = &prstatus;
@@ -221,7 +223,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	bufp = storenote(&notes[0], bufp);
 
 	/* set up the process info */
-	notes[1].name	= "CORE";
+	notes[1].name	= CORE_STR;
 	notes[1].type	= NT_PRPSINFO;
 	notes[1].datasz	= sizeof(struct elf_prpsinfo);
 	notes[1].data	= &prpsinfo;
@@ -238,7 +240,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	bufp = storenote(&notes[1], bufp);
 
 	/* set up the task structure */
-	notes[2].name	= "CORE";
+	notes[2].name	= CORE_STR;
 	notes[2].type	= NT_TASKSTRUCT;
 	notes[2].datasz	= sizeof(struct task_struct);
 	notes[2].data	= current;
-- 
cgit v1.2.3


From de21c57b90b3716f6f951e88e039d00ab6729ce9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@openvz.org>
Date: Wed, 6 Dec 2006 20:38:02 -0800
Subject: [PATCH] reiserfs: add missing D-cache flushing

Looks like, reiserfs_prepare_file_region_for_write() doesn't contain
several flush_dcache_page() calls.

Found with help from Dmitriy Monakhov <dmonakhov@openvz.org>

[akpm@osdl.org: small speedup]
Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
Cc: Dmitriy Monakhov <dmonakhov@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ac14318c81b..6526498949d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1045,6 +1045,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 			char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
 			memset(kaddr, 0, from);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(prepared_pages[0]);
 		}
 		if (to != PAGE_CACHE_SIZE) {	/* Last page needs to be partially zeroed */
 			char *kaddr =
@@ -1052,6 +1053,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 					KM_USER0);
 			memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(prepared_pages[num_pages - 1]);
 		}
 
 		/* Since all blocks are new - use already calculated value */
@@ -1185,6 +1187,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 					memset(kaddr + block_start, 0,
 					       from - block_start);
 					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(prepared_pages[0]);
 					set_buffer_uptodate(bh);
 				}
 			}
@@ -1222,6 +1225,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 							KM_USER0);
 					memset(kaddr + to, 0, block_end - to);
 					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(prepared_pages[num_pages - 1]);
 					set_buffer_uptodate(bh);
 				}
 			}
-- 
cgit v1.2.3


From 02316067852187b8bec781bec07410e91af79627 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:38:17 -0800
Subject: [PATCH] hotplug CPU: clean up hotcpu_notifier() use

There was lots of #ifdef noise in the kernel due to hotcpu_notifier(fn,
prio) not correctly marking 'fn' as used in the !HOTPLUG_CPU case, and thus
generating compiler warnings of unused symbols, hence forcing people to add
#ifdefs.

the compiler can skip truly unused functions just fine:

    text    data     bss     dec     hex filename
 1624412  728710 3674856 6027978  5bfaca vmlinux.before
 1624412  728710 3674856 6027978  5bfaca vmlinux.after

[akpm@osdl.org: topology.c fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index a8ca0ac2148..517860f2d75 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2972,7 +2972,6 @@ init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static void buffer_exit_cpu(int cpu)
 {
 	int i;
@@ -2994,7 +2993,6 @@ static int buffer_cpu_notify(struct notifier_block *self,
 		buffer_exit_cpu((unsigned long)hcpu);
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 void __init buffer_init(void)
 {
-- 
cgit v1.2.3


From 2bd94bd79e5bfa217714f78e5d6d7b6517ca546f Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 6 Dec 2006 20:38:18 -0800
Subject: [PATCH] ext3: fix reservation extension

Hugh Dickins wrote:
> Not found anything relevant, but I keep noticing these lines
> in ext2_try_to_allocate_with_rsv(), ext3 and ext4 similar:
>
> 		} else if (grp_goal > 0 &&
> 				(my_rsv->rsv_end - grp_goal + 1) < *count)
> 			try_to_extend_reservation(my_rsv, sb,
> 					*count-my_rsv->rsv_end + grp_goal - 1);
>
> They're wrong, a no-op in most groups, aren't they?  rsv_end is an
> absolute block number, whereas grp_goal is group-relative, so the
> calculation ought to bring in group_first_block?  Or I'm confused.
>

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: "linux-ext4@vger.kernel.org" <linux-ext4@vger.kernel.org>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b41a7d7e20f..f96c40a90e2 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1307,10 +1307,14 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0 &&
-			  (my_rsv->rsv_end-grp_goal+1) < *count)
-			try_to_extend_reservation(my_rsv, sb,
-					*count-my_rsv->rsv_end + grp_goal - 1);
+		} else if (grp_goal > 0) {
+			int curr = my_rsv->rsv_end -
+					(grp_goal + group_first_block) + 1;
+
+			if (curr < *count)
+				try_to_extend_reservation(my_rsv, sb,
+							*count - curr);
+		}
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-- 
cgit v1.2.3


From 1df1e63b9e9340015c01b85817568fb9afde10bc Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 6 Dec 2006 20:38:19 -0800
Subject: [PATCH] ext4: fix reservation extension

Hugh Dickins wrote:
> Not found anything relevant, but I keep noticing these lines
> in ext2_try_to_allocate_with_rsv(), ext3 and ext4 similar:
>
> 		} else if (grp_goal > 0 &&
> 				(my_rsv->rsv_end - grp_goal + 1) < *count)
> 			try_to_extend_reservation(my_rsv, sb,
> 					*count-my_rsv->rsv_end + grp_goal - 1);
>
> They're wrong, a no-op in most groups, aren't they?  rsv_end is an
> absolute block number, whereas grp_goal is group-relative, so the
> calculation ought to bring in group_first_block?  Or I'm confused.
>

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: "linux-ext4@vger.kernel.org" <linux-ext4@vger.kernel.org>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5d45582f951..6bd0bd5bbdc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1324,10 +1324,14 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0 &&
-			  (my_rsv->rsv_end-grp_goal+1) < *count)
-			try_to_extend_reservation(my_rsv, sb,
-					*count-my_rsv->rsv_end + grp_goal - 1);
+		} else if (grp_goal > 0) {
+			int curr = my_rsv->rsv_end -
+					(grp_goal + group_first_block) + 1;
+
+			if (curr < *count)
+				try_to_extend_reservation(my_rsv, sb,
+							*count - curr);
+		}
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-- 
cgit v1.2.3


From 8487f2e4067839ad3d009c240d51b682264320ae Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:25 -0800
Subject: [PATCH] make ecryptfs_version_str_map[] static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 306f8fbd1a0..3ede12b2593 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -691,7 +691,7 @@ static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
 
 static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
 
-struct ecryptfs_version_str_map_elem {
+static struct ecryptfs_version_str_map_elem {
 	u32 flag;
 	char *str;
 } ecryptfs_version_str_map[] = {
-- 
cgit v1.2.3


From d394e122bc1adba0f3eb1ebec1cedb8a8c524741 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:26 -0800
Subject: [PATCH] make fs/jbd/transaction.c:__journal_temp_unlink_buffer()
 static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 4f82bcd63e4..d38e0d575e4 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 
+static void __journal_temp_unlink_buffer(struct journal_head *jh);
+
 /*
  * get_transaction: obtain a new transaction_t object.
  *
@@ -1499,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
-void __journal_temp_unlink_buffer(struct journal_head *jh)
+static void __journal_temp_unlink_buffer(struct journal_head *jh)
 {
 	struct journal_head **list = NULL;
 	transaction_t *transaction;
-- 
cgit v1.2.3


From 7ddae86095794cce4364740edd8463c77654a265 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:27 -0800
Subject: [PATCH] make
 fs/jbd2/transaction.c:__kbd2_journal_temp_unlink_buffer() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/transaction.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c051a94c8a9..3a8700153cb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 
+static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+
 /*
  * jbd2_get_transaction: obtain a new transaction_t object.
  *
-- 
cgit v1.2.3


From c585646dd1d98caf0a5f2e85c794c1441df6fac1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:29 -0800
Subject: [PATCH] fs/lockd/host.c: make 2 functions static

Make the following needlessly global functions static:

 - nlm_lookup_host()
 - nsm_find()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/host.c | 53 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index fb24a973034..3d4610c2a26 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -36,34 +36,14 @@ static DEFINE_MUTEX(nlm_host_mutex);
 static void			nlm_gc_hosts(void);
 static struct nsm_handle *	__nsm_find(const struct sockaddr_in *,
 					const char *, int, int);
-
-/*
- * Find an NLM server handle in the cache. If there is none, create it.
- */
-struct nlm_host *
-nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
-			const char *hostname, int hostname_len)
-{
-	return nlm_lookup_host(0, sin, proto, version,
-			       hostname, hostname_len);
-}
-
-/*
- * Find an NLM client handle in the cache. If there is none, create it.
- */
-struct nlm_host *
-nlmsvc_lookup_host(struct svc_rqst *rqstp,
-			const char *hostname, int hostname_len)
-{
-	return nlm_lookup_host(1, &rqstp->rq_addr,
-			       rqstp->rq_prot, rqstp->rq_vers,
-			       hostname, hostname_len);
-}
+static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
+					 const char *hostname,
+					 int hostname_len);
 
 /*
  * Common host lookup routine for server & client
  */
-struct nlm_host *
+static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
 					int proto, int version,
 					const char *hostname,
@@ -194,6 +174,29 @@ nlm_destroy_host(struct nlm_host *host)
 	kfree(host);
 }
 
+/*
+ * Find an NLM server handle in the cache. If there is none, create it.
+ */
+struct nlm_host *
+nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
+			const char *hostname, int hostname_len)
+{
+	return nlm_lookup_host(0, sin, proto, version,
+			       hostname, hostname_len);
+}
+
+/*
+ * Find an NLM client handle in the cache. If there is none, create it.
+ */
+struct nlm_host *
+nlmsvc_lookup_host(struct svc_rqst *rqstp,
+			const char *hostname, int hostname_len)
+{
+	return nlm_lookup_host(1, &rqstp->rq_addr,
+			       rqstp->rq_prot, rqstp->rq_vers,
+			       hostname, hostname_len);
+}
+
 /*
  * Create the NLM RPC client for an NLM peer
  */
@@ -495,7 +498,7 @@ out:
 	return nsm;
 }
 
-struct nsm_handle *
+static struct nsm_handle *
 nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
 {
 	return __nsm_find(sin, hostname, hostname_len, 1);
-- 
cgit v1.2.3


From 9711ef9945ce33b2b4ecb51a4db3f65f7d784876 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:31 -0800
Subject: [PATCH] make fs/proc/base.c:proc_pid_instantiate() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05ace70d051..b859fc749c0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1885,8 +1885,9 @@ out:
 	return;
 }
 
-struct dentry *proc_pid_instantiate(struct inode *dir,
-	struct dentry * dentry, struct task_struct *task, void *ptr)
+static struct dentry *proc_pid_instantiate(struct inode *dir,
+					   struct dentry * dentry,
+					   struct task_struct *task, void *ptr)
 {
 	struct dentry *error = ERR_PTR(-ENOENT);
 	struct inode *inode;
-- 
cgit v1.2.3


From d18de5a2721f84ffd6a5d637915746ed47becc1c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Dec 2006 20:38:45 -0800
Subject: [PATCH] don't insert pipe dentries into dentry_hashtable.

We currently insert pipe dentries into the global dentry hashtable.  This
is suboptimal because there is currently no way these entries can be used
for a lookup().  (/proc/xxx/fd/xxx uses a different mechanism).  Inserting
them in dentry hashtable slows dcache lookups.

To let __dpath() still work correctly (ie not adding a " (deleted)") after
dentry name, we do :

 - Right after d_alloc(), pretend they are hashed by clearing the
   DCACHE_UNHASHED bit.

 - Call d_instantiate() instead of d_add() : dentry is not inserted in
   hash table.

__dpath() & friends work as intended during dentry lifetime.

 - At dismantle time, once dput() must clear the dentry, setting again
   DCACHE_UNHASHED bit inside the custom d_delete() function provided by
   pipe code, so that dput() can just kill_it.

This patch, combined with (avoid RCU for never hashed dentries) reduced
time of { pipe(p); close(p[0]); close(p[1]);} on my UP machine (1.6GHz
Pentium-M) from 3.23 us to 2.86 us (But this patch does not depend on other
patches, only bench results)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index b1626f269a3..ae36b89b1a3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -830,7 +830,14 @@ void free_pipe_info(struct inode *inode)
 static struct vfsmount *pipe_mnt __read_mostly;
 static int pipefs_delete_dentry(struct dentry *dentry)
 {
-	return 1;
+	/*
+	 * At creation time, we pretended this dentry was hashed
+	 * (by clearing DCACHE_UNHASHED bit in d_flags)
+	 * At delete time, we restore the truth : not hashed.
+	 * (so that dput() can proceed correctly)
+	 */
+	dentry->d_flags |= DCACHE_UNHASHED;
+	return 0;
 }
 
 static struct dentry_operations pipefs_dentry_operations = {
@@ -891,17 +898,22 @@ struct file *create_write_pipe(void)
 	if (!inode)
 		goto err_file;
 
-	sprintf(name, "[%lu]", inode->i_ino);
+	this.len = sprintf(name, "[%lu]", inode->i_ino);
 	this.name = name;
-	this.len = strlen(name);
-	this.hash = inode->i_ino; /* will go */
+	this.hash = 0;
 	err = -ENOMEM;
 	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
 		goto err_inode;
 
 	dentry->d_op = &pipefs_dentry_operations;
-	d_add(dentry, inode);
+	/*
+	 * We dont want to publish this dentry into global dentry hash table.
+	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
+	 * This permits a working /proc/$pid/fd/XXX on pipes
+	 */
+	dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(dentry, inode);
 	f->f_vfsmnt = mntget(pipe_mnt);
 	f->f_dentry = dentry;
 	f->f_mapping = inode->i_mapping;
-- 
cgit v1.2.3


From b3423415fbc2e5461605826317da1c8dbbf21f97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Dec 2006 20:38:48 -0800
Subject: [PATCH] dcache: avoid RCU for never-hashed dentries

Some dentries don't need to be globally visible in dentry hashtable.
(pipes & sockets)

Such dentries dont need to wait for a RCU grace period at delete time.
Being able to free them permits a better CPU cache use (hot cache)

This patch combined with (dont insert pipe dentries into dentry_hashtable)
reduced time of { pipe(p); close(p[0]); close(p[1]);} on my UP machine (1.6
GHz Pentium-M) from 3.23 us to 2.86 us (But this patch does not depend on
other patches, only bench results)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a7c67cee5d8..d68631f18df 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -68,15 +68,19 @@ struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
 };
 
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
 {
-	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void d_callback(struct rcu_head *head)
+{
+	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+	__d_free(dentry);
+}
+
 /*
  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
  * inside dcache_lock.
@@ -85,7 +89,11 @@ static void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
- 	call_rcu(&dentry->d_u.d_rcu, d_callback);
+	/* if dentry was never inserted into hash, immediate free is OK */
+	if (dentry->d_hash.pprev == NULL)
+		__d_free(dentry);
+	else
+		call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
-- 
cgit v1.2.3


From 01afb2134ed079fa4551b4d26f62423df6790c09 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:39:01 -0800
Subject: [PATCH] reiser: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c  | 3 +--
 fs/reiserfs/inode.c | 7 ++-----
 fs/reiserfs/super.c | 3 +--
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6526498949d..970ecd9dbe6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -317,12 +317,11 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 			/* area filled with zeroes, to supply as list of zero blocknumbers
 			   We allocate it outside of loop just in case loop would spin for
 			   several iterations. */
-			char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
+			char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
 			if (!zeros) {
 				res = -ENOMEM;
 				goto error_exit_free_blocks;
 			}
-			memset(zeros, 0, to_paste * UNFM_P_SIZE);
 			do {
 				to_paste =
 				    min_t(__u64, hole_size,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a625688de45..254239e6f9e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -929,15 +929,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
 			if (blocks_needed == 1) {
 				un = &unf_single;
 			} else {
-				un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC);	// We need to avoid scheduling.
+				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC);	// We need to avoid scheduling.
 				if (!un) {
 					un = &unf_single;
 					blocks_needed = 1;
 					max_to_insert = 0;
-				} else
-					memset(un, 0,
-					       UNFM_P_SIZE * min(blocks_needed,
-								 max_to_insert));
+				}
 			}
 			if (blocks_needed <= max_to_insert) {
 				/* we are going to add target block to the file. Use allocated
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 745bc714ba9..7fb5fb036f9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1549,13 +1549,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	struct reiserfs_sb_info *sbi;
 	int errval = -EINVAL;
 
-	sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
 	if (!sbi) {
 		errval = -ENOMEM;
 		goto error;
 	}
 	s->s_fs_info = sbi;
-	memset(sbi, 0, sizeof(struct reiserfs_sb_info));
 	/* Set default values for options: non-aggressive tails, RO on errors */
 	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
 	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
-- 
cgit v1.2.3


From c55747682e938c57a9a859d3b26f2c4c83cea011 Mon Sep 17 00:00:00 2001
From: "Vladimir V. Saveliev" <vs@namesys.com>
Date: Wed, 6 Dec 2006 20:39:12 -0800
Subject: [PATCH] reiserfs: do not add save links for O_DIRECT writes

We add a save link for O_DIRECT writes to protect the i_size against the
crashes before we actually finish the I/O.  If we hit an -ENOSPC in
aops->prepare_write(), we would do a truncate() to release the blocks which
might have got initialized.  Now the truncate would add another save link
for the same inode causing a reiserfs panic for having multiple save links
for the same inode.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Signed-off-by: Amit Arora <amitarora@in.ibm.com>
Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 54 ++++--------------------------------------------------
 1 file changed, 4 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 970ecd9dbe6..373d862c3f8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -406,6 +406,8 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 				   we restart it. This will also free the path. */
 				if (journal_transaction_should_end
 				    (th, th->t_blocks_allocated)) {
+					inode->i_size = cpu_key_k_offset(&key) +
+						(to_paste << inode->i_blkbits);
 					res =
 					    restart_transaction(th, inode,
 								&path);
@@ -1310,56 +1312,8 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 			count = MAX_NON_LFS - (unsigned long)*ppos;
 	}
 
-	if (file->f_flags & O_DIRECT) {	// Direct IO needs treatment
-		ssize_t result, after_file_end = 0;
-		if ((*ppos + count >= inode->i_size)
-		    || (file->f_flags & O_APPEND)) {
-			/* If we are appending a file, we need to put this savelink in here.
-			   If we will crash while doing direct io, finish_unfinished will
-			   cut the garbage from the file end. */
-			reiserfs_write_lock(inode->i_sb);
-			err =
-			    journal_begin(&th, inode->i_sb,
-					  JOURNAL_PER_BALANCE_CNT);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			reiserfs_update_inode_transaction(inode);
-			add_save_link(&th, inode, 1 /* Truncate */ );
-			after_file_end = 1;
-			err =
-			    journal_end(&th, inode->i_sb,
-					JOURNAL_PER_BALANCE_CNT);
-			reiserfs_write_unlock(inode->i_sb);
-			if (err)
-				return err;
-		}
-		result = do_sync_write(file, buf, count, ppos);
-
-		if (after_file_end) {	/* Now update i_size and remove the savelink */
-			struct reiserfs_transaction_handle th;
-			reiserfs_write_lock(inode->i_sb);
-			err = journal_begin(&th, inode->i_sb, 1);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			reiserfs_update_inode_transaction(inode);
-			mark_inode_dirty(inode);
-			err = journal_end(&th, inode->i_sb, 1);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			err = remove_save_link(inode, 1 /* truncate */ );
-			reiserfs_write_unlock(inode->i_sb);
-			if (err)
-				return err;
-		}
-
-		return result;
-	}
+	if (file->f_flags & O_DIRECT)
+		return do_sync_write(file, buf, count, ppos);
 
 	if (unlikely((ssize_t) count < 0))
 		return -EINVAL;
-- 
cgit v1.2.3


From 126039256cf164f69d8cfa20f1952d00fa61f52f Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Wed, 6 Dec 2006 20:39:17 -0800
Subject: [PATCH] jbd2: wait for already submitted t_sync_datalist buffer to
 complete

In the current jbd code, if a buffer on BJ_SyncData list is dirty and not
locked, the buffer is refiled to BJ_Locked list, submitted to the IO and
waited for IO completion.

But the fsstress test showed the case that when a buffer was already
submitted to the IO just before the buffer_dirty(bh) check, the buffer was
not waited for IO completion.

Following patch solves this problem.  If it is assumed that a buffer is
submitted to the IO before the buffer_dirty(bh) check and still being
written to disk, this buffer is refiled to BJ_Locked list.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Jan Kara <jack@ucw.cz>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 70b2ae1ef28..6bd8005e3d3 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -248,8 +248,12 @@ write_out_data:
 				bufs = 0;
 				goto write_out_data;
 			}
-		}
-		else {
+		} else if (!locked && buffer_locked(bh)) {
+			__jbd2_journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			put_bh(bh);
+		} else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
 			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-- 
cgit v1.2.3


From cd16c8f72aaaf2af06969d1441051b90010f7041 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:18 -0800
Subject: [PATCH] ext4 balloc: reset windowsz when full

ext4_new_blocks should reset the reservation window size to 0 when squeezing
the last blocks out of an almost full filesystem, so the retry doesn't skip
any groups with less than half that free, reporting ENOSPC too soon.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6bd0bd5bbdc..af3acf3c37b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1566,6 +1566,7 @@ retry_alloc:
 	 */
 	if (my_rsv) {
 		my_rsv = NULL;
+		windowsz = 0;
 		group_no = goal_group;
 		goto retry_alloc;
 	}
-- 
cgit v1.2.3


From e7dc95db2695dc92b223cdc49227ac57e63406d2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:19 -0800
Subject: [PATCH] ext4 balloc: fix off-by-one against grp_goal

grp_goal 0 is a genuine goal (unlike -1), so ext4_try_to_allocate_with_rsv
should treat it as such.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index af3acf3c37b..2bcca5261ee 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1288,7 +1288,7 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
+	 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
@@ -1324,7 +1324,7 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0) {
+		} else if (grp_goal >= 0) {
 			int curr = my_rsv->rsv_end -
 					(grp_goal + group_first_block) + 1;
 
-- 
cgit v1.2.3


From b2f2c76d17b68869914a1ec3ab04c7674668f60d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:20 -0800
Subject: [PATCH] ext4 balloc: fix off-by-one against rsv_end

rsv_end is the last block within the reservation, so alloc_new_reservation
should accept start_block == rsv_end as success.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 2bcca5261ee..c5589b3f58e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1165,7 +1165,7 @@ retry:
 	 * check if the first free block is within the
 	 * free space we just reserved
 	 */
-	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+	if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
 		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-- 
cgit v1.2.3


From b78a657f0a64134b3813bbdf4e1853d1420eb8d4 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:21 -0800
Subject: [PATCH] ext4 balloc: say rb_entry not list_entry

The reservations tree is an rb_tree not a list, so it's less confusing to use
rb_entry() than list_entry() - though they're both just container_of().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index c5589b3f58e..8e7249e3e39 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -165,7 +165,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %llu, end:  %llu\n",
@@ -966,7 +966,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node);
+		rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -1210,7 +1210,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node);
+		next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
-- 
cgit v1.2.3


From 341cee438593ff9c4520fb73c561460074f29a9a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:24 -0800
Subject: [PATCH] ext4 balloc: use io_error label

ext4_new_blocks has a nice io_error label for setting -EIO, so goto that in
the one place that doesn't already use it.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8e7249e3e39..1c8ef0e35f4 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1529,10 +1529,8 @@ retry_alloc:
 		if (group_no >= ngroups)
 			group_no = 0;
 		gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
-		if (!gdp) {
-			*errp = -EIO;
-			goto out;
-		}
+		if (!gdp)
+			goto io_error;
 		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 		/*
 		 * skip this group if the number of
-- 
cgit v1.2.3


From ec0837f230e57afde65db72539e748d2a75abed0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:26 -0800
Subject: [PATCH] ext4 balloc: fix _with_rsv freeze

Port fix to the off-by-one in find_next_usable_block's memscan from ext2 to
ext4; but it didn't cause a serious problem for ext4 because the additional
ext4_test_allocatable check rescued it from the error.

[akpm@osdl.org: build fix]
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1c8ef0e35f4..c4dd1103ccf 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -747,7 +747,7 @@ find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
 		here = 0;
 
 	p = ((char *)bh->b_data) + (here >> 3);
-	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
 	next = (r - ((char *)bh->b_data)) << 3;
 
 	if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
-- 
cgit v1.2.3


From c949d4eb40ce021b1abc3b63af23aa535662cb17 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 6 Dec 2006 20:39:38 -0800
Subject: [PATCH] autofs: fix error code path in autofs_fill_sb()

When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and
new (observed at least with 5.x.x) automount deamon is started, kernel
correctly reports incompatible version of kernel and userland daemon, but
then screws things up instead of correct handling of the error:

 autofs: kernel does not match daemon version
 =====================================
 [ BUG: bad unlock balance detected! ]
 -------------------------------------
 automount/4199 is trying to release lock (&type->s_umount_key) at:
 [<c0163b9e>] get_sb_nodev+0x76/0xa4
 but there are no more locks to release!

 other info that might help us debug this:
 no locks held by automount/4199.

 stack backtrace:
  [<c0103b15>] dump_trace+0x68/0x1b2
  [<c0103c77>] show_trace_log_lvl+0x18/0x2c
  [<c01041db>] show_trace+0xf/0x11
  [<c010424d>] dump_stack+0x12/0x14
  [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3
  [<c012fd4f>] lock_release+0x8d/0x164
  [<c012b452>] up_write+0x14/0x27
  [<c0163b9e>] get_sb_nodev+0x76/0xa4
  [<c0163689>] vfs_kern_mount+0x83/0xf6
  [<c016373e>] do_kern_mount+0x2d/0x3e
  [<c017513f>] do_mount+0x607/0x67a
  [<c0175224>] sys_mount+0x72/0xa4
  [<c0102b96>] sysenter_past_esp+0x5f/0x99
 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99
 Leftover inexact backtrace:
  =======================

and then deadlock comes.

The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but
before that, it calls kill_anon_super() to destroy the superblock which
won't be needed.  This is however way too soon to call kill_anon_super(),
because get_sb_nodev() has to perform its own cleanup of the superblock
first (deactivate_super(), etc.).  The correct time to call
kill_anon_super() is in the autofs_kill_sb() callback, which is called by
deactivate_super() at proper time, when the superblock is ready to be
killed.

I can see the same faulty codepath also in autofs4.  This patch solves
issues in both filesystems in a same way - it postpones the
kill_anon_super() until the proper time is signalized by deactivate_super()
calling the kill_sb() callback.

[raven@themaw.net: update comment]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Ian Kent <raven@themaw.net>
Cc: <stable@kernel.org>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/inode.c  | 7 ++++---
 fs/autofs4/inode.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 38ede5c9d6f..f968d134280 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -28,10 +28,11 @@ void autofs_kill_sb(struct super_block *sb)
 	/*
 	 * In the event of a failure in get_sb_nodev the superblock
 	 * info is not present so nothing else has been setup, so
-	 * just exit when we are called from deactivate_super.
+	 * just call kill_anon_super when we are called from
+	 * deactivate_super.
 	 */
 	if (!sbi)
-		return;
+		goto out_kill_sb;
 
 	if ( !sbi->catatonic )
 		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
@@ -44,6 +45,7 @@ void autofs_kill_sb(struct super_block *sb)
 
 	kfree(sb->s_fs_info);
 
+out_kill_sb:
 	DPRINTK(("autofs: shutting down\n"));
 	kill_anon_super(sb);
 }
@@ -209,7 +211,6 @@ fail_iput:
 fail_free:
 	kfree(sbi);
 	s->s_fs_info = NULL;
-	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ce7c0f1dd52..9c48250fd72 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -152,10 +152,11 @@ void autofs4_kill_sb(struct super_block *sb)
 	/*
 	 * In the event of a failure in get_sb_nodev the superblock
 	 * info is not present so nothing else has been setup, so
-	 * just exit when we are called from deactivate_super.
+	 * just call kill_anon_super when we are called from
+	 * deactivate_super.
 	 */
 	if (!sbi)
-		return;
+		goto out_kill_sb;
 
 	sb->s_fs_info = NULL;
 
@@ -167,6 +168,7 @@ void autofs4_kill_sb(struct super_block *sb)
 
 	kfree(sbi);
 
+out_kill_sb:
 	DPRINTK("shutting down");
 	kill_anon_super(sb);
 }
@@ -426,7 +428,6 @@ fail_ino:
 fail_free:
 	kfree(sbi);
 	s->s_fs_info = NULL;
-	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 3982cd99c30285ebc71d405cd8530a3dfb7265de Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:01 -0800
Subject: [PATCH] fs/sysv/: doc cleanup

Remove two different changelog files from fs/sysv/ and merges the INTRO
file into Documentation/filesystems/sysv-fs.txt

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/CHANGES   |  60 ------------------
 fs/sysv/ChangeLog | 106 -------------------------------
 fs/sysv/INTRO     | 182 ------------------------------------------------------
 3 files changed, 348 deletions(-)
 delete mode 100644 fs/sysv/CHANGES
 delete mode 100644 fs/sysv/ChangeLog
 delete mode 100644 fs/sysv/INTRO

(limited to 'fs')

diff --git a/fs/sysv/CHANGES b/fs/sysv/CHANGES
deleted file mode 100644
index 66ea6e92be6..00000000000
--- a/fs/sysv/CHANGES
+++ /dev/null
@@ -1,60 +0,0 @@
-Mon, 15 Dec 1997	  Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    namei.c: struct sysv_dir_inode_operations updated to use dentries.
-
-Fri, 23 Jan 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    inode.c: corrected 1 track offset setting (in sb->sv_block_base).
-		      Originally it was overridden (by setting to zero)
-		      in detected_[xenix,sysv4,sysv2,coherent]. Thanks
-		      to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
-		      for identifying the problem.
-
-Tue, 27 Jan 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-        *    inode.c: added 2048-byte block support to SystemV FS.
-		      Merged detected_bs[512,1024,2048]() into one function:
-		      void detected_bs (u_char type, struct super_block *sb).
-		      Thanks to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
-		      for the patch.
-
-Wed, 4 Feb 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    namei.c: removed static subdir(); is_subdir() from dcache.c
-		      is used instead. Cosmetic changes.
-
-Thu, 3 Dec 1998   Al Viro (viro@parcelfarce.linux.theplanet.co.uk)
-	*    namei.c (sysv_rmdir):
-		      Bugectomy: old check for victim being busy
-		      (inode->i_count) wasn't replaced (with checking
-		      dentry->d_count) and escaped Linus in the last round
-		      of changes. Shot and buried.
-
-Wed, 9 Dec 1998   AV
-	*    namei.c (do_sysv_rename):
-		       Fixed incorrect check for other owners + race.
-		       Removed checks that went to VFS.
-	*    namei.c (sysv_unlink):
-		       Removed checks that went to VFS.
-
-Thu, 10 Dec 1998   AV
-	*    namei.c (do_mknod):
-			Removed dead code - mknod is never asked to
-			create a symlink or directory. Incidentially,
-			it wouldn't do it right if it would be called.
-
-Sat, 26 Dec 1998   KGB
-	*    inode.c (detect_sysv4):
-			Added detection of expanded s_type field (0x10,
-			0x20 and 0x30).  Forced read-only access in this case.
-
-Sun, 21 Mar 1999   AV
-	*    namei.c (sysv_link):
-			Fixed i_count usage that resulted in dcache corruption.
-	*    inode.c:
-			Filled ->delete_inode() method with sysv_delete_inode().
-			sysv_put_inode() is gone, as it tried to do ->delete_
-			_inode()'s job.
-	*    ialloc.c: (sysv_free_inode):
-			Fixed race.
-
-Sun, 30 Apr 1999   AV
-	*    namei.c (sysv_mknod):
-			Removed dead code (S_IFREG case is now passed to
-			->create() by VFS).
diff --git a/fs/sysv/ChangeLog b/fs/sysv/ChangeLog
deleted file mode 100644
index f403f8b91b8..00000000000
--- a/fs/sysv/ChangeLog
+++ /dev/null
@@ -1,106 +0,0 @@
-Thu Feb 14 2002  Andrew Morton  <akpm@zip.com.au>
-
-	* dir_commit_chunk(): call writeout_one_page() as well as
-	  waitfor_one_page() for IS_SYNC directories, so that we
-	  actually do sync the directory. (forward-port from 2.4).
-
-Thu Feb  7 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* super.c: switched to ->get_sb()
-	* ChangeLog: fixed dates ;-)
-
-2002-01-24  David S. Miller  <davem@redhat.com>
-
-	* inode.c: Include linux/init.h
-
-Mon Jan 21 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-	* ialloc.c (sysv_new_inode): zero SYSV_I(inode)->i_data out.
-	* i_vnode renamed to vfs_inode.  Sorry, but let's keep that
-	  consistent.
-
-Sat Jan 19 2002  Christoph Hellwig  <hch@infradead.org>
-
-	* include/linux/sysv_fs.h (SYSV_I): Get fs-private inode data using
-		list_entry() instead of inode->u.
-	* include/linux/sysv_fs_i.h: Add 'struct inode  i_vnode' field to
-		sysv_inode_info structure.
-	* inode.c: Include <linux/slab.h>, implement alloc_inode/destroy_inode
-		sop methods, add infrastructure for per-fs inode slab cache.
-	* super.c (init_sysv_fs): Initialize inode cache, recover properly
-		in the case of failed register_filesystem for V7.
-	(exit_sysv_fs): Destroy inode cache.
-
-Sat Jan 19 2002  Christoph Hellwig  <hch@infradead.org>
-
-	* include/linux/sysv_fs.h: Include <linux/sysv_fs_i.h>, declare SYSV_I().
-	* dir.c (sysv_find_entry): Use SYSV_I() instead of ->u.sysv_i to
-		access fs-private inode data.
-	* ialloc.c (sysv_new_inode): Likewise.
-	* inode.c (sysv_read_inode): Likewise.
-	(sysv_update_inode): Likewise.
-	* itree.c (get_branch): Likewise.
-	(sysv_truncate): Likewise.
-	* symlink.c (sysv_readlink): Likewise.
-	(sysv_follow_link): Likewise.
-
-Fri Jan  4 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* ialloc.c (sysv_free_inode): Use sb->s_id instead of bdevname().
-	* inode.c (sysv_read_inode): Likewise.
-	  (sysv_update_inode): Likewise.
-	  (sysv_sync_inode): Likewise.
-	* super.c (detect_sysv): Likewise.
-	  (complete_read_super): Likewise.
-	  (sysv_read_super): Likewise.
-	  (v7_read_super): Likewise.
-
-Sun Dec 30 2001  Manfred Spraul  <manfred@colorfullife.com>
-
-	* dir.c (dir_commit_chunk): Do not set dir->i_version.
-	(sysv_readdir): Likewise.
-
-Thu Dec 27 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* itree.c (get_block): Use map_bh() to fill out bh_result.
-
-Tue Dec 25 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* super.c (sysv_read_super): Use sb_set_blocksize() to set blocksize.
-	  (v7_read_super): Likewise.
-
-Tue Nov 27 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* itree.c (get_block): Change type for iblock argument to sector_t.
-	* super.c (sysv_read_super): Set s_blocksize early.
-	  (v7_read_super): Likewise.
-	* balloc.c (sysv_new_block): Use sb_bread(). instead of bread().
-	  (sysv_count_free_blocks): Likewise.
-	* ialloc.c (sysv_raw_inode): Likewise.
-	* itree.c (get_branch): Likewise.
-	  (free_branches): Likewise.
-	* super.c (sysv_read_super): Likewise.
-	  (v7_read_super): Likewise.
-
-Sat Dec 15 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* inode.c (sysv_read_inode): Mark inode as bad in case of failure.
-	* super.c (complete_read_super): Check for bad root inode.
-
-Wed Nov 21 2001  Andrew Morton  <andrewm@uow.edu.au>
-
-	* file.c (sysv_sync_file): Call fsync_inode_data_buffers.
-
-Fri Oct 26 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* dir.c, ialloc.c, namei.c, include/linux/sysv_fs_i.h:
-	Implement per-Inode lookup offset cache.
-	Modelled after Ted's ext2 patch.
-
-Fri Oct 26 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* inode.c, super.c, include/linux/sysv_fs.h,
-	  include/linux/sysv_fs_sb.h:
-	Remove symlink faking.	Noone really wants to use these as
-	linux filesystems and native OSes don't support it anyway.
-
-
diff --git a/fs/sysv/INTRO b/fs/sysv/INTRO
deleted file mode 100644
index de4e4d17cac..00000000000
--- a/fs/sysv/INTRO
+++ /dev/null
@@ -1,182 +0,0 @@
-This is the implementation of the SystemV/Coherent filesystem for Linux.
-It grew out of separate filesystem implementations
-
-    Xenix FS      Doug Evans <dje@cygnus.com>  June 1992
-    SystemV FS    Paul B. Monday <pmonday@eecs.wsu.edu> March-June 1993
-    Coherent FS   B. Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> June 1993
-
-and was merged together in July 1993.
-
-These filesystems are rather similar. Here is a comparison with Minix FS:
-
-* Linux fdisk reports on partitions
-  - Minix FS     0x81 Linux/Minix
-  - Xenix FS     ??
-  - SystemV FS   ??
-  - Coherent FS  0x08 AIX bootable
-
-* Size of a block or zone (data allocation unit on disk)
-  - Minix FS     1024
-  - Xenix FS     1024 (also 512 ??)
-  - SystemV FS   1024 (also 512 and 2048)
-  - Coherent FS   512
-
-* General layout: all have one boot block, one super block and
-  separate areas for inodes and for directories/data.
-  On SystemV Release 2 FS (e.g. Microport) the first track is reserved and
-  all the block numbers (including the super block) are offset by one track.
-
-* Byte ordering of "short" (16 bit entities) on disk:
-  - Minix FS     little endian  0 1
-  - Xenix FS     little endian  0 1
-  - SystemV FS   little endian  0 1
-  - Coherent FS  little endian  0 1
-  Of course, this affects only the file system, not the data of files on it!
-
-* Byte ordering of "long" (32 bit entities) on disk:
-  - Minix FS     little endian  0 1 2 3
-  - Xenix FS     little endian  0 1 2 3
-  - SystemV FS   little endian  0 1 2 3
-  - Coherent FS  PDP-11         2 3 0 1
-  Of course, this affects only the file system, not the data of files on it!
-
-* Inode on disk: "short", 0 means non-existent, the root dir ino is:
-  - Minix FS                            1
-  - Xenix FS, SystemV FS, Coherent FS   2
-
-* Maximum number of hard links to a file:
-  - Minix FS     250
-  - Xenix FS     ??
-  - SystemV FS   ??
-  - Coherent FS  >=10000
-
-* Free inode management:
-  - Minix FS                             a bitmap
-  - Xenix FS, SystemV FS, Coherent FS
-      There is a cache of a certain number of free inodes in the super-block.
-      When it is exhausted, new free inodes are found using a linear search.
-
-* Free block management:
-  - Minix FS                             a bitmap
-  - Xenix FS, SystemV FS, Coherent FS
-      Free blocks are organized in a "free list". Maybe a misleading term,
-      since it is not true that every free block contains a pointer to
-      the next free block. Rather, the free blocks are organized in chunks
-      of limited size, and every now and then a free block contains pointers
-      to the free blocks pertaining to the next chunk; the first of these
-      contains pointers and so on. The list terminates with a "block number"
-      0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
-
-* Super-block location:
-  - Minix FS     block 1 = bytes 1024..2047
-  - Xenix FS     block 1 = bytes 1024..2047
-  - SystemV FS   bytes 512..1023
-  - Coherent FS  block 1 = bytes 512..1023
-
-* Super-block layout:
-  - Minix FS
-                    unsigned short s_ninodes;
-                    unsigned short s_nzones;
-                    unsigned short s_imap_blocks;
-                    unsigned short s_zmap_blocks;
-                    unsigned short s_firstdatazone;
-                    unsigned short s_log_zone_size;
-                    unsigned long s_max_size;
-                    unsigned short s_magic;
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short s_firstdatazone;
-                    unsigned long  s_nzones;
-                    unsigned short s_fzone_count;
-                    unsigned long  s_fzones[NICFREE];
-                    unsigned short s_finode_count;
-                    unsigned short s_finodes[NICINOD];
-                    char           s_flock;
-                    char           s_ilock;
-                    char           s_modified;
-                    char           s_rdonly;
-                    unsigned long  s_time;
-                    short          s_dinfo[4]; -- SystemV FS only
-                    unsigned long  s_free_zones;
-                    unsigned short s_free_inodes;
-                    short          s_dinfo[4]; -- Xenix FS only
-                    unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
-                    char           s_fname[6];
-                    char           s_fpack[6];
-    then they differ considerably:
-        Xenix FS
-                    char           s_clean;
-                    char           s_fill[371];
-                    long           s_magic;
-                    long           s_type;
-        SystemV FS
-                    long           s_fill[12 or 14];
-                    long           s_state;
-                    long           s_magic;
-                    long           s_type;
-        Coherent FS
-                    unsigned long  s_unique;
-    Note that Coherent FS has no magic.
-
-* Inode layout:
-  - Minix FS
-                    unsigned short i_mode;
-                    unsigned short i_uid;
-                    unsigned long  i_size;
-                    unsigned long  i_time;
-                    unsigned char  i_gid;
-                    unsigned char  i_nlinks;
-                    unsigned short i_zone[7+1+1];
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short i_mode;
-                    unsigned short i_nlink;
-                    unsigned short i_uid;
-                    unsigned short i_gid;
-                    unsigned long  i_size;
-                    unsigned char  i_zone[3*(10+1+1+1)];
-                    unsigned long  i_atime;
-                    unsigned long  i_mtime;
-                    unsigned long  i_ctime;
-
-* Regular file data blocks are organized as
-  - Minix FS
-               7 direct blocks
-               1 indirect block (pointers to blocks)
-               1 double-indirect block (pointer to pointers to blocks)
-  - Xenix FS, SystemV FS, Coherent FS
-              10 direct blocks
-               1 indirect block (pointers to blocks)
-               1 double-indirect block (pointer to pointers to blocks)
-               1 triple-indirect block (pointer to pointers to pointers to blocks)
-
-* Inode size, inodes per block
-  - Minix FS        32   32
-  - Xenix FS        64   16
-  - SystemV FS      64   16
-  - Coherent FS     64    8
-
-* Directory entry on disk
-  - Minix FS
-                    unsigned short inode;
-                    char name[14/30];
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short inode;
-                    char name[14];
-
-* Dir entry size, dir entries per block
-  - Minix FS     16/32    64/32
-  - Xenix FS     16       64
-  - SystemV FS   16       64
-  - Coherent FS  16       32
-
-* How to implement symbolic links such that the host fsck doesn't scream:
-  - Minix FS     normal
-  - Xenix FS     kludge: as regular files with  chmod 1000
-  - SystemV FS   ??
-  - Coherent FS  kludge: as regular files with  chmod 1000
-
-
-Notation: We often speak of a "block" but mean a zone (the allocation unit)
-and not the disk driver's notion of "block".
-
-
-Bruno Haible  <haible@ma2s2.mathematik.uni-karlsruhe.de>
-- 
cgit v1.2.3


From 0da1480ec33d4bac8c32051c1d33202be6dc439f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:03 -0800
Subject: [PATCH] proper prototype for remove_inode_dquot_ref()

Add a proper prototype for remove_inode_dquot_ref() in
include/linux/quotaops.h

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 699aa4f7aa7..9ecccab7326 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1242,9 +1242,6 @@ EXPORT_SYMBOL(inode_needs_sync);
  */
 #ifdef CONFIG_QUOTA
 
-/* Function back in dquot.c */
-int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
-
 void remove_dquot_ref(struct super_block *sb, int type,
 			struct list_head *tofree_head)
 {
-- 
cgit v1.2.3


From 71a3d1b4f7633835048f96a4ba79c8c87f03df4b Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Date: Wed, 6 Dec 2006 20:40:09 -0800
Subject: [PATCH] fs: ufs add missing bracket

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Cc: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 28fce6c239b..7dd12bb1d62 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -299,7 +299,7 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
 
 #define ubh_get_addr16(ubh,begin) \
 	(((__fs16*)((ubh)->bh[(begin) >> (uspi->s_fshift-1)]->b_data)) + \
-	((begin) & (uspi->fsize>>1) - 1)))
+	((begin) & ((uspi->fsize>>1) - 1)))
 
 #define ubh_get_addr32(ubh,begin) \
 	(((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \
-- 
cgit v1.2.3


From a8f48a95619cbce8f85423480e7d0a1bf971a62b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:40:13 -0800
Subject: [PATCH] ext3/4: don't do orphan processing on readonly devices

If you do something like:

  # touch foo
  # tail -f foo &
  # rm foo
  # <take snapshot>
  # <mount snapshot>

you'll panic, because ext3/4 tries to do orphan list processing on the
readonly snapshot device, and:

  kernel: journal commit I/O error
  kernel: Assertion failure in journal_flush_Rsmp_e2f189ce() at journal.c:1356: "!journal->j_checkpoint_transactions"
  kernel: Kernel panic: Fatal exception

for a truly readonly underlying device, it's reasonable and necessary
to just skip orphan list processing.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 6 ++++++
 fs/ext4/super.c | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ab19813335..580b8a6ca97 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1264,6 +1264,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
+	if (bdev_read_only(sb->s_bdev)) {
+		printk(KERN_ERR "EXT3-fs: write access "
+			"unavailable, skipping orphan cleanup.\n");
+		return;
+	}
+
 	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2ede7e2c701..486a641ca71 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1321,6 +1321,12 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
+	if (bdev_read_only(sb->s_bdev)) {
+		printk(KERN_ERR "EXT4-fs: write access "
+			"unavailable, skipping orphan cleanup.\n");
+		return;
+	}
+
 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
-- 
cgit v1.2.3


From 8de61e69c2feb10e5391cca67a3faf1d2bf77ce0 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@cs.washington.edu>
Date: Wed, 6 Dec 2006 20:40:16 -0800
Subject: [PATCH] fs: remove unused variable

Removed unused 'have_pt_gnu_stack' variable.

Reported by David Binderman <dcb314@hotmail.com>

Signed-off-by: David Rientjes <rientjes@cs.washington.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 14ea630a857..be5869d3499 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -544,7 +544,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
 	struct files_struct *files;
-	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
+	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
 	struct {
 		struct elfhdr elf_ex;
@@ -707,7 +707,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 				executable_stack = EXSTACK_DISABLE_X;
 			break;
 		}
-	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 
 	/* Some simple consistency checks for the interpreter */
 	if (elf_interpreter) {
-- 
cgit v1.2.3


From 3ee6f61ca0720c71086a9eaf3f5bd0f7c51fe139 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:23 -0800
Subject: [PATCH] remove NFSD_OPTIMIZE_SPACE

This patch removes the unused NFSD_OPTIMIZE_SPACE.

Additionally, it does differently what NFSD_OPTIMIZE_SPACE was supposed to do:

Nowadays, gcc knows best when to inline code, and CONFIG_CC_OPTIMIZE_FOR_SIZE
even tells gcc globally whether to optimize for size or for speed.  Therefore,
this patch also removes all inline's from these files.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3xdr.c | 24 ++++++++++--------------
 fs/nfsd/nfsxdr.c  | 13 ++++---------
 2 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index b4baca3053c..277df40f098 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -24,10 +24,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-#ifdef NFSD_OPTIMIZE_SPACE
-# define inline
-#endif
-
 
 /*
  * Mapping of S_IF* types to NFS file types
@@ -42,14 +38,14 @@ static u32	nfs3_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static inline __be32 *
+static __be32 *
 encode_time3(__be32 *p, struct timespec *time)
 {
 	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_time3(__be32 *p, struct timespec *time)
 {
 	time->tv_sec = ntohl(*p++);
@@ -57,7 +53,7 @@ decode_time3(__be32 *p, struct timespec *time)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size;
@@ -77,7 +73,7 @@ __be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 	return decode_fh(p, fhp);
 }
 
-static inline __be32 *
+static __be32 *
 encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size = fhp->fh_handle.fh_size;
@@ -91,7 +87,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline __be32 *
+static __be32 *
 decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -107,7 +103,7 @@ decode_filename(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_sattr3(__be32 *p, struct iattr *iap)
 {
 	u32	tmp;
@@ -153,7 +149,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	      struct kstat *stat)
 {
@@ -186,7 +182,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct inode	*inode = fhp->fh_dentry->d_inode;
@@ -776,7 +772,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 		return xdr_ressize_check(rqstp, p);
 }
 
-static inline __be32 *
+static __be32 *
 encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	     int namlen, ino_t ino)
 {
@@ -790,7 +786,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
 		struct svc_fh *fhp)
 {
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 56ebb1443e0..f5243f94399 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -18,11 +18,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-
-#ifdef NFSD_OPTIMIZE_SPACE
-# define inline
-#endif
-
 /*
  * Mapping of S_IF* types to NFS file types
  */
@@ -55,7 +50,7 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 	return decode_fh(p, fhp);
 }
 
-static inline __be32 *
+static __be32 *
 encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
@@ -66,7 +61,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline __be32 *
+static __be32 *
 decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -82,7 +77,7 @@ decode_filename(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_pathname(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -98,7 +93,7 @@ decode_pathname(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_sattr(__be32 *p, struct iattr *iap)
 {
 	u32	tmp, tmp1;
-- 
cgit v1.2.3


From b593e48d2bf09c40c0f5165f2f2a10cc3983ea51 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:32 -0800
Subject: [PATCH] affs: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/affs/bitmap.c | 3 +--
 fs/afs/server.c  | 3 +--
 fs/afs/super.c   | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index b0b953683c1..b330009fe42 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -289,12 +289,11 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
 	sbi->s_bmap_count = (sbi->s_partition_size - sbi->s_reserved +
 				 sbi->s_bmap_bits - 1) / sbi->s_bmap_bits;
 	size = sbi->s_bmap_count * sizeof(*bm);
-	bm = sbi->s_bitmap = kmalloc(size, GFP_KERNEL);
+	bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL);
 	if (!sbi->s_bitmap) {
 		printk(KERN_ERR "AFFS: Bitmap allocation failed\n");
 		return -ENOMEM;
 	}
-	memset(sbi->s_bitmap, 0, size);
 
 	bmap_blk = (__be32 *)sbi->s_root_bh->b_data;
 	blk = sb->s_blocksize / 4 - 49;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 22afaae1a4c..44aff81dc6a 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -55,13 +55,12 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
 	_enter("%p,%08x,", cell, ntohl(addr->s_addr));
 
 	/* allocate and initialise a server record */
-	server = kmalloc(sizeof(struct afs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
 	if (!server) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(server, 0, sizeof(struct afs_server));
 	atomic_set(&server->usage, 1);
 
 	INIT_LIST_HEAD(&server->link);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 9a351c4c756..18d9b77ba40 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -242,14 +242,12 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 	kenter("");
 
 	/* allocate a superblock info record */
-	as = kmalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (!as) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(as, 0, sizeof(struct afs_super_info));
-
 	afs_get_volume(params->volume);
 	as->volume = params->volume;
 
-- 
cgit v1.2.3


From 4a08a9f68168e547c2baf100020e9b96cae5fbd1 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:34 -0800
Subject: [PATCH] jffs: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/intrep.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 478a74e2e9d..d0e783f199e 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -592,7 +592,7 @@ jffs_add_virtual_root(struct jffs_control *c)
 	D2(printk("jffs_add_virtual_root(): "
 		  "Creating a virtual root directory.\n"));
 
-	if (!(root = kmalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
+	if (!(root = kzalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
 		return -ENOMEM;
 	}
 	no_jffs_file++;
@@ -604,7 +604,6 @@ jffs_add_virtual_root(struct jffs_control *c)
 	DJM(no_jffs_node++);
 	memset(node, 0, sizeof(struct jffs_node));
 	node->ino = JFFS_MIN_INO;
-	memset(root, 0, sizeof(struct jffs_file));
 	root->ino = JFFS_MIN_INO;
 	root->mode = S_IFDIR | S_IRWXU | S_IRGRP
 		     | S_IXGRP | S_IROTH | S_IXOTH;
-- 
cgit v1.2.3


From 15ad7cdcfd76450d4beebc789ec646664238184d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 6 Dec 2006 20:40:36 -0800
Subject: [PATCH] struct seq_operations and struct file_operations
 constification

 - move some file_operations structs into the .rodata section

 - move static strings from policy_types[] array into the .rodata section

 - fix generic seq_operations usages, so that those structs may be defined
   as "const" as well

[akpm@osdl.org: couple of fixes]
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/seq_file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 555b9ac04c2..10690aa401c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -26,7 +26,7 @@
  *	ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
  *	returns 0 in case of success and negative number in case of error.
  */
-int seq_open(struct file *file, struct seq_operations *op)
+int seq_open(struct file *file, const struct seq_operations *op)
 {
 	struct seq_file *p = file->private_data;
 
@@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open);
 
 int single_release(struct inode *inode, struct file *file)
 {
-	struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
+	const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
 	int res = seq_release(inode, file);
 	kfree(op);
 	return res;
-- 
cgit v1.2.3


From f46ba2235feab5e686b1234c328a0577cde86e21 Mon Sep 17 00:00:00 2001
From: Jun Chen <jimcgnu@yahoo.com>
Date: Wed, 6 Dec 2006 20:40:37 -0800
Subject: [PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC
 correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Twenty characters in cp936 are not correctly handled.  They're all in the
U00 plane.  nls_cp936 converts all U00XY to XY but this is not correct for
some characters.(e.g.  U00B7 -> A1A4, U00A8 -> A1A7).

This problem is fixed by generating u2c_00 based on all c2u_xx and changing
uni2char() to give U00 plane a special handling.  The "â¬"(U20AC,80 in
cp936) is also be handled properly.

Acked-by: Gang Chen <cgdlut@gmail.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nls/nls_cp936.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 103 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index 046fde8170e..65e640c61c8 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -4421,6 +4421,73 @@ static wchar_t *page_charset2uni[256] = {
 	c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL,   
 };
 
+static unsigned char u2c_00[512] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */
+	0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */
+	0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */
+	0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */
+	0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */
+	0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */
+	0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */
+	0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */
+	0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */
+	0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */
+};
+
 static unsigned char u2c_01[512] = {
 	0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
@@ -10825,7 +10892,7 @@ static unsigned char u2c_FF[512] = {
 };
 
 static unsigned char *page_uni2charset[256] = {
-	NULL,   u2c_01, u2c_02, u2c_03, u2c_04, NULL,   NULL,   NULL,   
+	u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL,   NULL,   NULL,
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
@@ -10936,11 +11003,34 @@ static int uni2char(const wchar_t uni,
 	unsigned char *uni2charset;
 	unsigned char cl = uni&0xFF;
 	unsigned char ch = (uni>>8)&0xFF;
-	int n;
+	unsigned char out0,out1;
 
 	if (boundlen <= 0)
 		return -ENAMETOOLONG;
 
+	if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */
+		out[0] = 0x80;
+		return 1;
+	}
+
+	if (ch == 0) { /* handle the U00 plane*/
+		/* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/
+		out0 = u2c_00[cl*2];
+		out1 = u2c_00[cl*2+1];
+		if (out0 == 0x00 && out1 == 0x00) {
+			if (cl<0x80) {
+				out[0] = cl;
+				return 1;
+			}
+			return -EINVAL;
+		} else {
+			if (boundlen <= 1)
+				return -ENAMETOOLONG;
+			out[0] = out0;
+			out[1] = out1;
+			return 2;
+		}
+	}
 
 	uni2charset = page_uni2charset[ch];
 	if (uni2charset) {
@@ -10950,15 +11040,10 @@ static int uni2char(const wchar_t uni,
 		out[1] = uni2charset[cl*2+1];
 		if (out[0] == 0x00 && out[1] == 0x00)
 			return -EINVAL;
-		n = 2;
-	} else if (ch==0 && cl) {
-		out[0] = cl;
-		n = 1;
+		return 2;
 	}
 	else
 		return -EINVAL;
-
-	return n;
 }
 
 static int char2uni(const unsigned char *rawstring, int boundlen,
@@ -10972,7 +11057,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
 		return -ENAMETOOLONG;
 
 	if (boundlen == 1) {
-		*uni = rawstring[0];
+		if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */
+			*uni = 0x20ac;
+		} else {
+			*uni = rawstring[0];
+		}
 		return 1;
 	}
 
@@ -10986,7 +11075,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
 			return -EINVAL;
 		n = 2;
 	} else{
-		*uni = ch;
+		if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */
+			*uni = 0x20ac;
+		} else {
+			*uni = ch;
+		}
 		n = 1;
 	}
 	return n;
-- 
cgit v1.2.3


From 6d4df677f8a60ea6bc0ef1a596c1a3a79b1d4882 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:39 -0800
Subject: [PATCH] do_coredump() and not stopping rewrite attacks?

On Sat, Dec 02, 2006 at 11:47:44PM +0300, Alexey Dobriyan wrote:
> David Binderman compiled 2.6.19 with icc and grepped for "was set but never
> used". Many warnings are on
> 	http://coderock.org/kj/unused-2.6.19-fs

Heh, the very first line:
fs/exec.c(1465): remark #593: variable "flag" was set but never used

fs/exec.c:
  1477		/*
  1478		 *	We cannot trust fsuid as being the "true" uid of the
  1479		 *	process nor do we know its entire history. We only know it
  1480		 *	was tainted so we dump it as root in mode 2.
  1481		 */
  1482		if (mm->dumpable == 2) {	/* Setuid core dump mode */
  1483			flag = O_EXCL;		/* Stop rewrite attacks */
  1484			current->fsuid = 0;	/* Dump root private */
  1485		}

And then filp_open follows with "flag" totally ignored.

(akpm: this restores the code to Alan's original version.  Andi's "Support
piping into commands in /proc/sys/kernel/core_pattern" (cset d025c9db) broke
it).

Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@kerenl.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 2092bd20746..add0e03c3ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1515,7 +1515,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		ispipe = 1;
  	} else
  		file = filp_open(corename,
-				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+				 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
 	inode = file->f_dentry->d_inode;
-- 
cgit v1.2.3


From b62e8ec2ac580b47c11eb76e8852ac1ec7d617cd Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 6 Dec 2006 20:40:43 -0800
Subject: [PATCH] aio: kill pointless ki_nbytes assignment in
 aio_setup_single_vector

io_submit_one assigns ki_left = ki_nbytes = iocb->aio_nbytes, then calls
down to aio_setup_iocb, then to aio_setup_single_vector.  In there,
ki_nbytes is reassigned to the same value it got two call stack above it.
There is no need to do so.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 13aa9298abf..f02ad2677fc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1413,7 +1413,6 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
 	kiocb->ki_iovec->iov_len = kiocb->ki_left;
 	kiocb->ki_nr_segs = 1;
 	kiocb->ki_cur_seg = 0;
-	kiocb->ki_nbytes = kiocb->ki_left;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 97d2a80584b30b5cd32da411deca1986ef61877a Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Wed, 6 Dec 2006 20:40:45 -0800
Subject: [PATCH] aio: remove ki_retried debugging member

Remove the ki_retried member from struct kiocb.  I think the idea was
bounced around a while back, but Arnaldo pointed out another reason that we
should dig it up when he pointed out that the last cacheline of struct
kiocb only contains 4 bytes.  By removing the debugging member, we save
more than the 8 byte on 64 bit machines.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index f02ad2677fc..d3a6ec2c962 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -666,17 +666,6 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	ssize_t (*retry)(struct kiocb *);
 	ssize_t ret;
 
-	if (iocb->ki_retried++ > 1024*1024) {
-		printk("Maximal retry count.  Bytes done %Zd\n",
-			iocb->ki_nbytes - iocb->ki_left);
-		return -EAGAIN;
-	}
-
-	if (!(iocb->ki_retried & 0xff)) {
-		pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried,
-			iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
-	}
-
 	if (!(retry = iocb->ki_retry)) {
 		printk("aio_run_iocb: iocb->ki_retry = NULL\n");
 		return 0;
@@ -1005,9 +994,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
 	kunmap_atomic(ring, KM_IRQ1);
 
 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
-
-	pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
-		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
@@ -1590,7 +1576,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->ki_opcode = iocb->aio_lio_opcode;
 	init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
 	INIT_LIST_HEAD(&req->ki_wait.task_list);
-	req->ki_retried = 0;
 
 	ret = aio_setup_iocb(req);
 
-- 
cgit v1.2.3


From feb189274638ea357750a9e809f5a6e2223a082e Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann.lombardi@bull.net>
Date: Wed, 6 Dec 2006 20:40:46 -0800
Subject: [PATCH] ext4: fix credit calculation in
 ext4_ext_calc_credits_for_insert

Fix a nit in ext4_ext_calc_credits_for_insert().  Besides, credits for the
new root are already added in the index split accounting.

Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
Signed-off-by: Alex Tomas <alex@clusterfs.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1442ccbaea7..994a6e450e0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1531,16 +1531,17 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 
 	/*
 	 * tree can be full, so it would need to grow in depth:
-	 * allocation + old root + new root
+	 * we need one credit to modify old root, credits for
+	 * new root will be added in split accounting
 	 */
-	needed += 2 + 1 + 1;
+	needed += 1;
 
 	/*
 	 * Index split can happen, we would need:
 	 *    allocate intermediate indexes (bitmap + group)
 	 *  + change two blocks at each level, but root (already included)
 	 */
-	needed = (depth * 2) + (depth * 2);
+	needed += (depth * 2) + (depth * 2);
 
 	/* any allocation modifies superblock */
 	needed += 1;
-- 
cgit v1.2.3


From ef5036782e619ab394daaec5c00876fa6b17d7a1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:23 -0800
Subject: [PATCH] ext3 balloc: reset windowsz when full

ext3_new_blocks should reset the reservation window size to 0 when squeezing
the last blocks out of an almost full filesystem, so the retry doesn't skip
any groups with less than half that free, reporting ENOSPC too soon.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index f96c40a90e2..db8ca34d102 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1552,6 +1552,7 @@ retry_alloc:
 	 */
 	if (my_rsv) {
 		my_rsv = NULL;
+		windowsz = 0;
 		group_no = goal_group;
 		goto retry_alloc;
 	}
-- 
cgit v1.2.3


From 1650242324a0c19d629a8be0d5a8ecdc174d31f8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:25 -0800
Subject: [PATCH] ext3 balloc: fix off-by-one against grp_goal

grp_goal 0 is a genuine goal (unlike -1), so ext3_try_to_allocate_with_rsv
should treat it as such.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index db8ca34d102..0706a45d3c0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1271,7 +1271,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
@@ -1307,7 +1307,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0) {
+		} else if (grp_goal >= 0) {
 			int curr = my_rsv->rsv_end -
 					(grp_goal + group_first_block) + 1;
 
-- 
cgit v1.2.3


From ff50dc562bf6de00fec264b11531e2745209ba12 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:25 -0800
Subject: [PATCH] ext3 balloc: fix off-by-one against rsv_end

rsv_end is the last block within the reservation, so alloc_new_reservation
should accept start_block == rsv_end as success.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 0706a45d3c0..57cf47a9420 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1148,7 +1148,7 @@ retry:
 	 * check if the first free block is within the
 	 * free space we just reserved
 	 */
-	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+	if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
 		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-- 
cgit v1.2.3


From c56d2561f7189762c4fce854630d4f7f6d64ccee Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:27 -0800
Subject: [PATCH] ext3 balloc: say rb_entry not list_entry

The reservations tree is an rb_tree not a list, so it's less confusing to use
rb_entry() than list_entry() - though they're both just container_of().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 57cf47a9420..03d39b6fd30 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -144,7 +144,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %lu, end:  %lu\n",
@@ -949,7 +949,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+		rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -1193,7 +1193,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
-- 
cgit v1.2.3


From 2823b5535efad71e950ef50c2ce5f9e4dbaedc17 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:28 -0800
Subject: [PATCH] ext3 balloc: use io_error label

ext3_new_blocks has a nice io_error label for setting -EIO, so goto that in
the one place that doesn't already use it.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 03d39b6fd30..5a2c198762f 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1515,10 +1515,8 @@ retry_alloc:
 		if (group_no >= ngroups)
 			group_no = 0;
 		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
-		if (!gdp) {
-			*errp = -EIO;
-			goto out;
-		}
+		if (!gdp)
+			goto io_error;
 		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 		/*
 		 * skip this group if the number of
-- 
cgit v1.2.3


From 7d1c520bb57e4b5e94ec937c13553dccf473341b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:30 -0800
Subject: [PATCH] ext3 balloc: fix _with_rsv freeze

Port fix to the off-by-one in find_next_usable_block's memscan from ext2 to
ext3; but it didn't cause a serious problem for ext3 because the additional
ext3_test_allocatable check rescued it from the error.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 5a2c198762f..22161740ba2 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -730,7 +730,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 		here = 0;
 
 	p = ((char *)bh->b_data) + (here >> 3);
-	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
 	next = (r - ((char *)bh->b_data)) << 3;
 
 	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
-- 
cgit v1.2.3


From 7e0289766a0750a56260565bd3b74eb544483d45 Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:33 -0800
Subject: [PATCH] ext4: if expression format

changes instances of
	if ((lhs = expression)) {

to the preferred coding style

	lhs=expression;
	if (lhs) {

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 63 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 994a6e450e0..06ce8e87731 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -186,7 +186,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		depth = path->p_depth;
 
 		/* try to predict block placement */
-		if ((ex = path[depth].p_ext))
+		ex = path[depth].p_ext;
+		if (ex)
 			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
 		/* it looks like index is empty;
@@ -543,7 +544,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	struct ext4_extent_idx *ix;
 	int len, err;
 
-	if ((err = ext4_ext_get_access(handle, inode, curp)))
+	err = ext4_ext_get_access(handle, inode, curp);
+	if (err)
 		return err;
 
 	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
@@ -665,7 +667,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 	lock_buffer(bh);
 
-	if ((err = ext4_journal_get_create_access(handle, bh)))
+	err = ext4_journal_get_create_access(handle, bh);
+	if (err)
 		goto cleanup;
 
 	neh = ext_block_hdr(bh);
@@ -702,18 +705,21 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 
-	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+	err = ext4_journal_dirty_metadata(handle, bh);
+	if (err)
 		goto cleanup;
 	brelse(bh);
 	bh = NULL;
 
 	/* correct old leaf */
 	if (m) {
-		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
 			goto cleanup;
 		path[depth].p_hdr->eh_entries =
 		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
-		if ((err = ext4_ext_dirty(handle, inode, path + depth)))
+		err = ext4_ext_dirty(handle, inode, path + depth);
+		if (err)
 			goto cleanup;
 
 	}
@@ -736,7 +742,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		}
 		lock_buffer(bh);
 
-		if ((err = ext4_journal_get_create_access(handle, bh)))
+		err = ext4_journal_get_create_access(handle, bh);
+		if (err)
 			goto cleanup;
 
 		neh = ext_block_hdr(bh);
@@ -780,7 +787,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 
-		if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		err = ext4_journal_dirty_metadata(handle, bh);
+		if (err)
 			goto cleanup;
 		brelse(bh);
 		bh = NULL;
@@ -854,7 +862,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	}
 	lock_buffer(bh);
 
-	if ((err = ext4_journal_get_create_access(handle, bh))) {
+	err = ext4_journal_get_create_access(handle, bh);
+	if (err) {
 		unlock_buffer(bh);
 		goto out;
 	}
@@ -874,11 +883,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 
-	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+	err = ext4_journal_dirty_metadata(handle, bh);
+	if (err)
 		goto out;
 
 	/* create index in new top-level index: num,max,pointer */
-	if ((err = ext4_ext_get_access(handle, inode, curp)))
+	err = ext4_ext_get_access(handle, inode, curp);
+	if (err)
 		goto out;
 
 	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
@@ -1070,20 +1081,24 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	 */
 	k = depth - 1;
 	border = path[depth].p_ext->ee_block;
-	if ((err = ext4_ext_get_access(handle, inode, path + k)))
+	err = ext4_ext_get_access(handle, inode, path + k);
+	if (err)
 		return err;
 	path[k].p_idx->ei_block = border;
-	if ((err = ext4_ext_dirty(handle, inode, path + k)))
+	err = ext4_ext_dirty(handle, inode, path + k);
+	if (err)
 		return err;
 
 	while (k--) {
 		/* change all left-side indexes */
 		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
 			break;
-		if ((err = ext4_ext_get_access(handle, inode, path + k)))
+		err = ext4_ext_get_access(handle, inode, path + k);
+		if (err)
 			break;
 		path[k].p_idx->ei_block = border;
-		if ((err = ext4_ext_dirty(handle, inode, path + k)))
+		err = ext4_ext_dirty(handle, inode, path + k);
+		if (err)
 			break;
 	}
 
@@ -1142,7 +1157,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
 				le16_to_cpu(ex->ee_len), ext_pblock(ex));
-		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
 			return err;
 		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
 					 + le16_to_cpu(newext->ee_len));
@@ -1192,7 +1208,8 @@ repeat:
 has_space:
 	nearex = path[depth].p_ext;
 
-	if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+	err = ext4_ext_get_access(handle, inode, path + depth);
+	if (err)
 		goto cleanup;
 
 	if (!nearex) {
@@ -1486,10 +1503,12 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	path--;
 	leaf = idx_pblock(path->p_idx);
 	BUG_ON(path->p_hdr->eh_entries == 0);
-	if ((err = ext4_ext_get_access(handle, inode, path)))
+	err = ext4_ext_get_access(handle, inode, path);
+	if (err)
 		return err;
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
-	if ((err = ext4_ext_dirty(handle, inode, path)))
+	err = ext4_ext_dirty(handle, inode, path);
+	if (err)
 		return err;
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
@@ -1930,7 +1949,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	mutex_lock(&EXT4_I(inode)->truncate_mutex);
 
 	/* check in cache */
-	if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) {
+	goal = ext4_ext_in_cache(inode, iblock, &newex);
+	if (goal) {
 		if (goal == EXT4_EXT_CACHE_GAP) {
 			if (!create) {
 				/* block isn't allocated yet and
@@ -1969,7 +1989,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	 */
 	BUG_ON(path[depth].p_ext == NULL && depth != 0);
 
-	if ((ex = path[depth].p_ext)) {
+	ex = path[depth].p_ext;
+	if (ex) {
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
 		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
-- 
cgit v1.2.3


From 5d4958f923f431e148d9ba8ff894209a134b943e Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:35 -0800
Subject: [PATCH] ext4: kmalloc to kzalloc

Performs kmalloc to kzalloc conversion

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06ce8e87731..e41be1866d7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -477,13 +477,12 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
 
 	/* account possible depth increase */
 	if (!path) {
-		path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2),
+		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
 				GFP_NOFS);
 		if (!path)
 			return ERR_PTR(-ENOMEM);
 		alloc = 1;
 	}
-	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
 	path[0].p_hdr = eh;
 
 	/* walk through the tree */
@@ -643,10 +642,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	 * We need this to handle errors and free blocks
 	 * upon them.
 	 */
-	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
+	ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
 		return -ENOMEM;
-	memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
 
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
@@ -1773,12 +1771,11 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	 * We start scanning from right side, freeing all the blocks
 	 * after i_size and walking into the tree depth-wise.
 	 */
-	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
 	if (path == NULL) {
 		ext4_journal_stop(handle);
 		return -ENOMEM;
 	}
-	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
 	path[0].p_hdr = ext_inode_hdr(inode);
 	if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
 		err = -EIO;
-- 
cgit v1.2.3


From 09b882520bbe01f2e5044642109c1c1d19fe3559 Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:36 -0800
Subject: [PATCH] ext4: Eliminate inline functions

Removes all inline keywords, since the compiler will make static functions
inline when it is appropriate.

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e41be1866d7..dc2724fa762 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -48,7 +48,7 @@
  * ext_pblock:
  * combine low and high parts of physical block number into ext4_fsblk_t
  */
-static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
 	ext4_fsblk_t block;
 
@@ -61,7 +61,7 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
  * idx_pblock:
  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
  */
-static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
 	ext4_fsblk_t block;
 
@@ -75,7 +75,7 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
  * stores a large physical block number into an extent struct,
  * breaking it into parts
  */
-static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@@ -86,7 +86,7 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb
  * stores a large physical block number into an index struct,
  * breaking it into parts
  */
-static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
+static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
 	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@@ -216,7 +216,7 @@ ext4_ext_new_block(handle_t *handle, struct inode *inode,
 	return newblock;
 }
 
-static inline int ext4_ext_space_block(struct inode *inode)
+static int ext4_ext_space_block(struct inode *inode)
 {
 	int size;
 
@@ -229,7 +229,7 @@ static inline int ext4_ext_space_block(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_block_idx(struct inode *inode)
+static int ext4_ext_space_block_idx(struct inode *inode)
 {
 	int size;
 
@@ -242,7 +242,7 @@ static inline int ext4_ext_space_block_idx(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_root(struct inode *inode)
+static int ext4_ext_space_root(struct inode *inode)
 {
 	int size;
 
@@ -256,7 +256,7 @@ static inline int ext4_ext_space_root(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_root_idx(struct inode *inode)
+static int ext4_ext_space_root_idx(struct inode *inode)
 {
 	int size;
 
@@ -1103,7 +1103,7 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	return err;
 }
 
-static int inline
+static int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
@@ -1395,7 +1395,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
 	return err;
 }
 
-static inline void
+static void
 ext4_ext_put_in_cache(struct inode *inode, __u32 block,
 			__u32 len, __u32 start, int type)
 {
@@ -1413,7 +1413,7 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
  * calculate boundaries of the gap that the requested block fits into
  * and cache this gap
  */
-static inline void
+static void
 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 				unsigned long block)
 {
@@ -1454,7 +1454,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 	ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
 }
 
-static inline int
+static int
 ext4_ext_in_cache(struct inode *inode, unsigned long block,
 			struct ext4_extent *ex)
 {
@@ -1523,7 +1523,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * the caller should calculate credits under truncate_mutex and
  * pass the actual path.
  */
-int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_insert(struct inode *inode,
 						struct ext4_ext_path *path)
 {
 	int depth, needed;
@@ -1733,7 +1733,7 @@ out:
  * ext4_ext_more_to_rm:
  * returns 1 if current index has to be freed (even partial)
  */
-static int inline
+static int
 ext4_ext_more_to_rm(struct ext4_ext_path *path)
 {
 	BUG_ON(path->p_idx == NULL);
-- 
cgit v1.2.3


From c271c5c22b0a7ca45fda15f1f4d258bca36a5b94 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <Sunil.Mushran@oracle.com>
Date: Tue, 5 Dec 2006 17:56:35 -0800
Subject: ocfs2: local mounts

This allows users to format an ocfs2 file system with a special flag,
OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT. When the file system sees this flag, it
will not use any cluster services, nor will it require a cluster
configuration, thus acting like a 'local' file system.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c   | 79 +++++++++++++++++++++++++++++++++------------
 fs/ocfs2/heartbeat.c |  9 ++++++
 fs/ocfs2/inode.c     |  3 +-
 fs/ocfs2/journal.c   | 46 +++++++++++++++++++--------
 fs/ocfs2/journal.h   |  5 ++-
 fs/ocfs2/mmap.c      |  6 ++--
 fs/ocfs2/namei.c     |  8 +++--
 fs/ocfs2/ocfs2.h     |  5 +++
 fs/ocfs2/ocfs2_fs.h  |  5 ++-
 fs/ocfs2/super.c     | 90 +++++++++++++++++++++++++++++++++++++---------------
 fs/ocfs2/vote.c      |  3 ++
 11 files changed, 193 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 69fba16efbd..e6220137bf6 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -770,7 +770,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     int dlm_flags)
 {
 	int ret = 0;
-	enum dlm_status status;
+	enum dlm_status status = DLM_NORMAL;
 	unsigned long flags;
 
 	mlog_entry_void();
@@ -1138,6 +1138,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 {
 	int status, level;
 	struct ocfs2_lock_res *lockres;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	BUG_ON(!inode);
 
@@ -1147,6 +1148,9 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
 
 	level = write ? LKM_EXMODE : LKM_PRMODE;
@@ -1164,6 +1168,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
 {
 	int level = write ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	mlog_entry_void();
 
@@ -1171,7 +1176,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
-	ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
 
 	mlog_exit_void();
 }
@@ -1182,6 +1188,7 @@ int ocfs2_data_lock_full(struct inode *inode,
 {
 	int status = 0, level;
 	struct ocfs2_lock_res *lockres;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	BUG_ON(!inode);
 
@@ -1201,6 +1208,9 @@ int ocfs2_data_lock_full(struct inode *inode,
 		goto out;
 	}
 
+	if (ocfs2_mount_local(osb))
+		goto out;
+
 	lockres = &OCFS2_I(inode)->ip_data_lockres;
 
 	level = write ? LKM_EXMODE : LKM_PRMODE;
@@ -1269,6 +1279,7 @@ void ocfs2_data_unlock(struct inode *inode,
 {
 	int level = write ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	mlog_entry_void();
 
@@ -1276,7 +1287,8 @@ void ocfs2_data_unlock(struct inode *inode,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
-	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
+	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
+	    !ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
 
 	mlog_exit_void();
@@ -1467,8 +1479,9 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 {
 	int status = 0;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_lock_res *lockres;
+	struct ocfs2_lock_res *lockres = NULL;
 	struct ocfs2_dinode *fe;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	mlog_entry_void();
 
@@ -1483,10 +1496,12 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	}
 	spin_unlock(&oi->ip_lock);
 
-	lockres = &oi->ip_meta_lockres;
+	if (!ocfs2_mount_local(osb)) {
+		lockres = &oi->ip_meta_lockres;
 
-	if (!ocfs2_should_refresh_lock_res(lockres))
-		goto bail;
+		if (!ocfs2_should_refresh_lock_res(lockres))
+			goto bail;
+	}
 
 	/* This will discard any caching information we might have had
 	 * for the inode metadata. */
@@ -1496,7 +1511,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	 * map (directories, bitmap files, etc) */
 	ocfs2_extent_map_trunc(inode, 0);
 
-	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
+	if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) {
 		mlog(0, "Trusting LVB on inode %llu\n",
 		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
@@ -1543,7 +1558,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 
 	status = 0;
 bail_refresh:
-	ocfs2_complete_lock_res_refresh(lockres, status);
+	if (lockres)
+		ocfs2_complete_lock_res_refresh(lockres, status);
 bail:
 	mlog_exit(status);
 	return status;
@@ -1585,7 +1601,7 @@ int ocfs2_meta_lock_full(struct inode *inode,
 			 int arg_flags)
 {
 	int status, level, dlm_flags, acquired;
-	struct ocfs2_lock_res *lockres;
+	struct ocfs2_lock_res *lockres = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *local_bh = NULL;
 
@@ -1607,6 +1623,9 @@ int ocfs2_meta_lock_full(struct inode *inode,
 		goto bail;
 	}
 
+	if (ocfs2_mount_local(osb))
+		goto local;
+
 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
 		wait_event(osb->recovery_event,
 			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
@@ -1636,6 +1655,7 @@ int ocfs2_meta_lock_full(struct inode *inode,
 		wait_event(osb->recovery_event,
 			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
 
+local:
 	/*
 	 * We only see this flag if we're being called from
 	 * ocfs2_read_locked_inode(). It means we're locking an inode
@@ -1644,7 +1664,8 @@ int ocfs2_meta_lock_full(struct inode *inode,
 	 */
 	if (inode->i_state & I_NEW) {
 		status = 0;
-		ocfs2_complete_lock_res_refresh(lockres, 0);
+		if (lockres)
+			ocfs2_complete_lock_res_refresh(lockres, 0);
 		goto bail;
 	}
 
@@ -1767,6 +1788,7 @@ void ocfs2_meta_unlock(struct inode *inode,
 {
 	int level = ex ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	mlog_entry_void();
 
@@ -1774,7 +1796,8 @@ void ocfs2_meta_unlock(struct inode *inode,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 
-	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
+	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
+	    !ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
 
 	mlog_exit_void();
@@ -1783,7 +1806,7 @@ void ocfs2_meta_unlock(struct inode *inode,
 int ocfs2_super_lock(struct ocfs2_super *osb,
 		     int ex)
 {
-	int status;
+	int status = 0;
 	int level = ex ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
 	struct buffer_head *bh;
@@ -1794,6 +1817,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
+	if (ocfs2_mount_local(osb))
+		goto bail;
+
 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1832,7 +1858,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
 	int level = ex ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
 
-	ocfs2_cluster_unlock(osb, lockres, level);
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres, level);
 }
 
 int ocfs2_rename_lock(struct ocfs2_super *osb)
@@ -1843,6 +1870,9 @@ int ocfs2_rename_lock(struct ocfs2_super *osb)
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0);
 	if (status < 0)
 		mlog_errno(status);
@@ -1854,7 +1884,8 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
 {
 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
 
-	ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
 }
 
 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
@@ -1869,6 +1900,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex)
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
 	if (ret < 0)
 		mlog_errno(ret);
@@ -1882,7 +1916,8 @@ void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
-	ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
 }
 
 /* Reference counting of the dlm debug structure. We want this because
@@ -2145,12 +2180,15 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
 
 int ocfs2_dlm_init(struct ocfs2_super *osb)
 {
-	int status;
+	int status = 0;
 	u32 dlm_key;
-	struct dlm_ctxt *dlm;
+	struct dlm_ctxt *dlm = NULL;
 
 	mlog_entry_void();
 
+	if (ocfs2_mount_local(osb))
+		goto local;
+
 	status = ocfs2_dlm_init_debug(osb);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2178,11 +2216,12 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 		goto bail;
 	}
 
+	dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb);
+
+local:
 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
 
-	dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb);
-
 	osb->dlm = dlm;
 
 	status = 0;
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index cbfd45a97a6..8fc52d6d0ce 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -154,6 +154,9 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
 {
 	int status;
 
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	status = o2hb_register_callback(&osb->osb_hb_down);
 	if (status < 0) {
 		mlog_errno(status);
@@ -172,6 +175,9 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
 {
 	int status;
 
+	if (ocfs2_mount_local(osb))
+		return;
+
 	status = o2hb_unregister_callback(&osb->osb_hb_down);
 	if (status < 0)
 		mlog_errno(status);
@@ -186,6 +192,9 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
 	int ret;
 	char *argv[5], *envp[3];
 
+	if (ocfs2_mount_local(osb))
+		return;
+
 	if (!osb->uuid_str) {
 		/* This can happen if we don't get far enough in mount... */
 		mlog(0, "No UUID with which to stop heartbeat!\n\n");
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 42e361f3054..e4d91493d7d 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -423,7 +423,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 	 * cluster lock before trusting anything anyway.
 	 */
 	can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
-		&& !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
+		&& !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK)
+		&& !ocfs2_mount_local(osb);
 
 	/*
 	 * To maintain backwards compatibility with older versions of
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1d7f4ab1e5e..825cb0ae1b4 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -144,8 +144,10 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
 			ocfs2_abort(osb->sb, "Detected aborted journal");
 			handle = ERR_PTR(-EROFS);
 		}
-	} else
-		atomic_inc(&(osb->journal->j_num_trans));
+	} else {
+		if (!ocfs2_mount_local(osb))
+			atomic_inc(&(osb->journal->j_num_trans));
+	}
 
 	return handle;
 }
@@ -507,9 +509,23 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 
 	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
 
-	status = ocfs2_journal_toggle_dirty(osb, 0);
-	if (status < 0)
-		mlog_errno(status);
+	if (ocfs2_mount_local(osb)) {
+		journal_lock_updates(journal->j_journal);
+		status = journal_flush(journal->j_journal);
+		journal_unlock_updates(journal->j_journal);
+		if (status < 0)
+			mlog_errno(status);
+	}
+
+	if (status == 0) {
+		/*
+		 * Do not toggle if flush was unsuccessful otherwise
+		 * will leave dirty metadata in a "clean" journal
+		 */
+		status = ocfs2_journal_toggle_dirty(osb, 0);
+		if (status < 0)
+			mlog_errno(status);
+	}
 
 	/* Shutdown the kernel journal system */
 	journal_destroy(journal->j_journal);
@@ -549,7 +565,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
 	}
 }
 
-int ocfs2_journal_load(struct ocfs2_journal *journal)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
 {
 	int status = 0;
 	struct ocfs2_super *osb;
@@ -576,14 +592,18 @@ int ocfs2_journal_load(struct ocfs2_journal *journal)
 	}
 
 	/* Launch the commit thread */
-	osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt");
-	if (IS_ERR(osb->commit_task)) {
-		status = PTR_ERR(osb->commit_task);
+	if (!local) {
+		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
+					       "ocfs2cmt");
+		if (IS_ERR(osb->commit_task)) {
+			status = PTR_ERR(osb->commit_task);
+			osb->commit_task = NULL;
+			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
+			     "error=%d", status);
+			goto done;
+		}
+	} else
 		osb->commit_task = NULL;
-		mlog(ML_ERROR, "unable to launch ocfs2commit thread, error=%d",
-		     status);
-		goto done;
-	}
 
 done:
 	mlog_exit(status);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 899112ad813..e1216364d19 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -157,7 +157,7 @@ int    ocfs2_journal_init(struct ocfs2_journal *journal,
 void   ocfs2_journal_shutdown(struct ocfs2_super *osb);
 int    ocfs2_journal_wipe(struct ocfs2_journal *journal,
 			  int full);
-int    ocfs2_journal_load(struct ocfs2_journal *journal);
+int    ocfs2_journal_load(struct ocfs2_journal *journal, int local);
 int    ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
 void   ocfs2_recovery_thread(struct ocfs2_super *osb,
 			     int node_num);
@@ -174,6 +174,9 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 {
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
+	if (ocfs2_mount_local(osb))
+		return;
+
 	if (!ocfs2_inode_fully_checkpointed(inode)) {
 		/* WARNING: This only kicks off a single
 		 * checkpoint. If someone races you and adds more
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 69f85ae392d..51b02044768 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -83,10 +83,12 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
 int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int ret = 0, lock_level = 0;
+	struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
 
 	/* We don't want to support shared writable mappings yet. */
-	if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
-	    && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
+	if (!ocfs2_mount_local(osb) &&
+	    ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
+	    ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
 		mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
 		/* This is -EINVAL because generic_file_readonly_mmap
 		 * returns it in a similar situation. */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 21db45ddf14..9637039c263 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -587,9 +587,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 
 	ocfs2_inode_set_new(osb, inode);
-	status = ocfs2_create_new_inode_locks(inode);
-	if (status < 0)
-		mlog_errno(status);
+	if (!ocfs2_mount_local(osb)) {
+		status = ocfs2_create_new_inode_locks(inode);
+		if (status < 0)
+			mlog_errno(status);
+	}
 
 	status = 0; /* error in ocfs2_create_new_inode_locks is not
 		     * critical */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index b767fd7da6e..db8e77cd35d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -349,6 +349,11 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
 	return ret;
 }
 
+static inline int ocfs2_mount_local(struct ocfs2_super *osb)
+{
+	return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+}
+
 #define OCFS2_IS_VALID_DINODE(ptr)					\
 	(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
 
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3330a5dc6be..2e90e89f160 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -86,7 +86,7 @@
 	OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
 
 #define OCFS2_FEATURE_COMPAT_SUPP	0
-#define OCFS2_FEATURE_INCOMPAT_SUPP	0
+#define OCFS2_FEATURE_INCOMPAT_SUPP	OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	0
 
 /*
@@ -97,6 +97,9 @@
 #define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV	0x0002
 
 
+/* Used to denote a non-clustered volume */
+#define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT	0x0008
+
 /*
  * Flags on ocfs2_dinode.i_flags
  */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4bf39540e65..a6d2f8cc165 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -508,6 +508,27 @@ bail:
 	return status;
 }
 
+static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
+{
+	if (ocfs2_mount_local(osb)) {
+		if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
+			mlog(ML_ERROR, "Cannot heartbeat on a locally "
+			     "mounted device.\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
+		if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) {
+			mlog(ML_ERROR, "Heartbeat has to be started to mount "
+			     "a read-write clustered device.\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct dentry *root;
@@ -516,16 +537,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode = NULL;
 	struct ocfs2_super *osb = NULL;
 	struct buffer_head *bh = NULL;
+	char nodestr[8];
 
 	mlog_entry("%p, %p, %i", sb, data, silent);
 
-	/* for now we only have one cluster/node, make sure we see it
-	 * in the heartbeat universe */
-	if (!o2hb_check_local_node_heartbeating()) {
+	if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
 		status = -EINVAL;
 		goto read_super_error;
 	}
 
+	/* for now we only have one cluster/node, make sure we see it
+	 * in the heartbeat universe */
+	if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) {
+		if (!o2hb_check_local_node_heartbeating()) {
+			status = -EINVAL;
+			goto read_super_error;
+		}
+	}
+
 	/* probe for superblock */
 	status = ocfs2_sb_probe(sb, &bh, &sector_size);
 	if (status < 0) {
@@ -541,11 +570,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	brelse(bh);
 	bh = NULL;
-
-	if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
-		status = -EINVAL;
-		goto read_super_error;
-	}
 	osb->s_mount_opt = parsed_opt;
 
 	sb->s_magic = OCFS2_SUPER_MAGIC;
@@ -588,21 +612,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (!ocfs2_is_hard_readonly(osb)) {
-		/* If this isn't a hard readonly mount, then we need
-		 * to make sure that heartbeat is in a valid state,
-		 * and that we mark ourselves soft readonly is -oro
-		 * was specified. */
-		if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
-			mlog(ML_ERROR, "No heartbeat for device (%s)\n",
-			     sb->s_id);
-			status = -EINVAL;
-			goto read_super_error;
-		}
-
 		if (sb->s_flags & MS_RDONLY)
 			ocfs2_set_ro_flag(osb, 0);
 	}
 
+	status = ocfs2_verify_heartbeat(osb);
+	if (status < 0) {
+		mlog_errno(status);
+		goto read_super_error;
+	}
+
 	osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
 						 ocfs2_debugfs_root);
 	if (!osb->osb_debug_root) {
@@ -635,9 +654,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
 	ocfs2_complete_mount_recovery(osb);
 
-	printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) "
+	if (ocfs2_mount_local(osb))
+		snprintf(nodestr, sizeof(nodestr), "local");
+	else
+		snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
+
+	printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
 	       "with %s data mode.\n",
-	       osb->dev_str, osb->node_num, osb->slot_num,
+	       osb->dev_str, nodestr, osb->slot_num,
 	       osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
 	       "ordered");
 
@@ -999,7 +1023,11 @@ static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
 
 	/* XXX hold a ref on the node while mounte?  easy enough, if
 	 * desirable. */
-	osb->node_num = o2nm_this_node();
+	if (ocfs2_mount_local(osb))
+		osb->node_num = 0;
+	else
+		osb->node_num = o2nm_this_node();
+
 	if (osb->node_num == O2NM_MAX_NODES) {
 		mlog(ML_ERROR, "could not find this host's node number\n");
 		status = -ENOENT;
@@ -1084,6 +1112,9 @@ static int ocfs2_mount_volume(struct super_block *sb)
 		goto leave;
 	}
 
+	if (ocfs2_mount_local(osb))
+		goto leave;
+
 	/* This should be sent *after* we recovered our journal as it
 	 * will cause other nodes to unmark us as needing
 	 * recovery. However, we need to send it *before* dropping the
@@ -1114,6 +1145,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 {
 	int tmp;
 	struct ocfs2_super *osb = NULL;
+	char nodestr[8];
 
 	mlog_entry("(0x%p)\n", sb);
 
@@ -1177,8 +1209,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
 
-	printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n",
-	       osb->dev_str, osb->node_num);
+	if (ocfs2_mount_local(osb))
+		snprintf(nodestr, sizeof(nodestr), "local");
+	else
+		snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
+
+	printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
+	       osb->dev_str, nodestr);
 
 	ocfs2_delete_osb(osb);
 	kfree(osb);
@@ -1536,6 +1573,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
 {
 	int status = 0;
 	int dirty;
+	int local;
 	struct ocfs2_dinode *local_alloc = NULL; /* only used if we
 						  * recover
 						  * ourselves. */
@@ -1563,8 +1601,10 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
 		     "recovering volume.\n");
 	}
 
+	local = ocfs2_mount_local(osb);
+
 	/* will play back anything left in the journal. */
-	ocfs2_journal_load(osb->journal);
+	ocfs2_journal_load(osb->journal, local);
 
 	if (dirty) {
 		/* recover my local alloc if we didn't unmount cleanly. */
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 5b4dca79990..0315a8b61ed 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -1000,6 +1000,9 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
 {
 	int status = 0;
 
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
 					osb->net_key,
 					sizeof(struct ocfs2_response_msg),
-- 
cgit v1.2.3


From 8903901dbf46bbdf1f70ffe7bc09cb6b97e6728a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 7 Dec 2006 18:05:37 -0800
Subject: ocfs2: Synchronize feature incompat flags in ocfs2_fs.h

These got a little bit out of date with ocfs2-tools, make things consistent
again. We reserve a flag for sparse allocation code as that's pretty close
to testable at this point.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 2e90e89f160..b5c68567077 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -96,10 +96,19 @@
  */
 #define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV	0x0002
 
+/*
+ * tunefs sets this incompat flag before starting the resize and clears it
+ * at the end. This flag protects users from inadvertently mounting the fs
+ * after an aborted run without fsck-ing.
+ */
+#define OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG    0x0004
 
 /* Used to denote a non-clustered volume */
 #define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT	0x0008
 
+/* Support for sparse allocation in b-trees */
+#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC	0x0010
+
 /*
  * Flags on ocfs2_dinode.i_flags
  */
-- 
cgit v1.2.3


From 296b75ed6a3b35f613961cefe4962ce1cf586d77 Mon Sep 17 00:00:00 2001
From: Andrew Beekhof <abeekhof@suse.de>
Date: Mon, 4 Dec 2006 14:04:53 +0100
Subject: [patch 1/3] OCFS2 - Expose struct o2nm_cluster

Subsequent patches (namely userspace heartbeat and configurable timeouts)
require access to the o2nm_cluster struct.  This patch does the necessary
shuffling.

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/nodemanager.c | 13 +------------
 fs/ocfs2/cluster/nodemanager.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index d11753c50bc..dd4aefa11b3 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,7 +35,7 @@
 /* for now we operate under the assertion that there can be only one
  * cluster active at a time.  Changing this will require trickling
  * cluster references throughout where nodes are looked up */
-static struct o2nm_cluster *o2nm_single_cluster = NULL;
+struct o2nm_cluster *o2nm_single_cluster = NULL;
 
 #define OCFS2_MAX_HB_CTL_PATH 256
 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
@@ -97,17 +97,6 @@ const char *o2nm_get_hb_ctl_path(void)
 }
 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
 
-struct o2nm_cluster {
-	struct config_group	cl_group;
-	unsigned		cl_has_local:1;
-	u8			cl_local_node;
-	rwlock_t		cl_nodes_lock;
-	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
-	struct rb_root		cl_node_ip_tree;
-	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
-	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
-};
-
 struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
 {
 	struct o2nm_node *node = NULL;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index fce8033c310..b571cda9fbb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -53,6 +53,20 @@ struct o2nm_node {
 	unsigned long		nd_set_attributes;
 };
 
+struct o2nm_cluster {
+	struct config_group	cl_group;
+	unsigned		cl_has_local:1;
+	u8			cl_local_node;
+	rwlock_t		cl_nodes_lock;
+	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
+	struct rb_root		cl_node_ip_tree;
+
+	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
+	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+};
+
+extern struct o2nm_cluster *o2nm_single_cluster;
+
 u8 o2nm_this_node(void);
 
 int o2nm_configured_node_map(unsigned long *map, unsigned bytes);
-- 
cgit v1.2.3


From b5dd80304da482d77b2320e1a01a189e656b9770 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.de>
Date: Mon, 4 Dec 2006 14:04:54 +0100
Subject: [patch 2/3] OCFS2 Configurable timeouts

Allow configuration of OCFS2 timeouts from userspace via configfs

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/nodemanager.c  | 161 ++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/cluster/nodemanager.h  |   3 +
 fs/ocfs2/cluster/tcp.c          |  60 ++++++++++++---
 fs/ocfs2/cluster/tcp.h          |   7 ++
 fs/ocfs2/cluster/tcp_internal.h |   6 --
 5 files changed, 219 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index dd4aefa11b3..234f83f2897 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -532,6 +532,161 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
 }
 #endif
 
+struct o2nm_cluster_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct o2nm_cluster *, char *);
+	ssize_t (*store)(struct o2nm_cluster *, const char *, size_t);
+};
+
+static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count,
+                                       unsigned int *val)
+{
+	unsigned long tmp;
+	char *p = (char *)page;
+
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp == 0)
+		return -EINVAL;
+	if (tmp >= (u32)-1)
+		return -ERANGE;
+
+	*val = tmp;
+
+	return count;
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(
+	struct o2nm_cluster *cluster, char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(
+	struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+	ssize_t ret;
+	unsigned int val;
+
+	ret =  o2nm_cluster_attr_write(page, count, &val);
+
+	if (ret > 0) {
+		if (val <= cluster->cl_keepalive_delay_ms) {
+			mlog(ML_NOTICE, "o2net: idle timeout must be larger "
+			     "than keepalive delay\n");
+			return -EINVAL;
+		}
+		cluster->cl_idle_timeout_ms = val;
+	}
+
+	return ret;
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(
+	struct o2nm_cluster *cluster, char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(
+	struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+	ssize_t ret;
+	unsigned int val;
+
+	ret =  o2nm_cluster_attr_write(page, count, &val);
+
+	if (ret > 0) {
+		if (val >= cluster->cl_idle_timeout_ms) {
+			mlog(ML_NOTICE, "o2net: keepalive delay must be "
+			     "smaller than idle timeout\n");
+			return -EINVAL;
+		}
+		cluster->cl_keepalive_delay_ms = val;
+	}
+
+	return ret;
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(
+	struct o2nm_cluster *cluster, char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
+	struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+	return o2nm_cluster_attr_write(page, count,
+	                               &cluster->cl_reconnect_delay_ms);
+}
+static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "idle_timeout_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_idle_timeout_ms_read,
+	.store	= o2nm_cluster_attr_idle_timeout_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "keepalive_delay_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_keepalive_delay_ms_read,
+	.store	= o2nm_cluster_attr_keepalive_delay_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "reconnect_delay_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_reconnect_delay_ms_read,
+	.store	= o2nm_cluster_attr_reconnect_delay_ms_write,
+};
+
+static struct configfs_attribute *o2nm_cluster_attrs[] = {
+	&o2nm_cluster_attr_idle_timeout_ms.attr,
+	&o2nm_cluster_attr_keepalive_delay_ms.attr,
+	&o2nm_cluster_attr_reconnect_delay_ms.attr,
+	NULL,
+};
+static ssize_t o2nm_cluster_show(struct config_item *item,
+                                 struct configfs_attribute *attr,
+                                 char *page)
+{
+	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+	struct o2nm_cluster_attribute *o2nm_cluster_attr =
+		container_of(attr, struct o2nm_cluster_attribute, attr);
+	ssize_t ret = 0;
+
+	if (o2nm_cluster_attr->show)
+		ret = o2nm_cluster_attr->show(cluster, page);
+	return ret;
+}
+
+static ssize_t o2nm_cluster_store(struct config_item *item,
+                                  struct configfs_attribute *attr,
+                                  const char *page, size_t count)
+{
+	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+	struct o2nm_cluster_attribute *o2nm_cluster_attr =
+		container_of(attr, struct o2nm_cluster_attribute, attr);
+	ssize_t ret;
+
+	if (o2nm_cluster_attr->store == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = o2nm_cluster_attr->store(cluster, page, count);
+	if (ret < count)
+		goto out;
+out:
+	return ret;
+}
+
 static struct config_item *o2nm_node_group_make_item(struct config_group *group,
 						     const char *name)
 {
@@ -613,10 +768,13 @@ static void o2nm_cluster_release(struct config_item *item)
 
 static struct configfs_item_operations o2nm_cluster_item_ops = {
 	.release	= o2nm_cluster_release,
+	.show_attribute		= o2nm_cluster_show,
+	.store_attribute	= o2nm_cluster_store,
 };
 
 static struct config_item_type o2nm_cluster_type = {
 	.ct_item_ops	= &o2nm_cluster_item_ops,
+	.ct_attrs	= o2nm_cluster_attrs,
 	.ct_owner	= THIS_MODULE,
 };
 
@@ -667,6 +825,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	cluster->cl_group.default_groups[2] = NULL;
 	rwlock_init(&cluster->cl_nodes_lock);
 	cluster->cl_node_ip_tree = RB_ROOT;
+	cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
+	cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
+	cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
 
 	ret = &cluster->cl_group;
 	o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index b571cda9fbb..8fb23cacc2f 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -60,6 +60,9 @@ struct o2nm_cluster {
 	rwlock_t		cl_nodes_lock;
 	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
 	struct rb_root		cl_node_ip_tree;
+	unsigned int		cl_idle_timeout_ms;
+	unsigned int		cl_keepalive_delay_ms;
+	unsigned int		cl_reconnect_delay_ms;
 
 	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
 	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9b3209dc0b1..ebbaee664c6 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes);
 static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
+
+/*
+ * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
+ * losing our parent link to the cluster during shutdown. This can be
+ * solved by adding a pre-removal callback to configfs, or passing
+ * around the cluster with the node. -jeffm
+ */
+static inline int o2net_reconnect_delay(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_reconnect_delay_ms;
+}
+
+static inline int o2net_keepalive_delay(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_keepalive_delay_ms;
+}
+
+static inline int o2net_idle_timeout(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_idle_timeout_ms;
+}
 
 static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
 {
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
 {
 	struct o2net_sock_container *sc = container_of(kref,
 					struct o2net_sock_container, sc_kref);
+	BUG_ON(timer_pending(&sc->sc_idle_timeout));
+
 	sclog(sc, "releasing\n");
 
 	if (sc->sc_sock) {
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 		/* delay if we're withing a RECONNECT_DELAY of the
 		 * last attempt */
 		delay = (nn->nn_last_connect_attempt +
-			 msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+			 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
 			- jiffies;
-		if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+		if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
 			delay = 0;
 		mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
 		queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1105,7 +1129,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
 	/* set valid and queue the idle timers only if it hasn't been
 	 * shut down already */
 	if (nn->nn_sc == sc) {
-		o2net_sc_postpone_idle(sc);
+		o2net_sc_reset_idle_timer(sc);
 		o2net_set_nn_state(nn, sc, 1, 0);
 	}
 	spin_unlock(&nn->nn_lock);
@@ -1287,8 +1311,10 @@ static void o2net_idle_timer(unsigned long data)
 
 	do_gettimeofday(&now);
 
-	printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 "
-	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
+	printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
+		     o2net_idle_timeout(sc->sc_node) / 1000,
+		     o2net_idle_timeout(sc->sc_node) % 1000);
 	mlog(ML_NOTICE, "here are some times that might help debug the "
 	     "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
 	     "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1306,14 +1332,21 @@ static void o2net_idle_timer(unsigned long data)
 	o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
 }
 
-static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
 {
 	o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
 	o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
-				    O2NET_KEEPALIVE_DELAY_SECS * HZ);
+		      msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
 	do_gettimeofday(&sc->sc_tv_timer);
 	mod_timer(&sc->sc_idle_timeout,
-		  jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ));
+	       jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
+}
+
+static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+{
+	/* Only push out an existing timer */
+	if (timer_pending(&sc->sc_idle_timeout))
+		o2net_sc_reset_idle_timer(sc);
 }
 
 /* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1435,9 +1468,12 @@ static void o2net_connect_expired(struct work_struct *work)
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
+		struct o2nm_node *node = nn->nn_sc->sc_node;
 		mlog(ML_ERROR, "no connection established with node %u after "
-		     "%u seconds, giving up and returning errors.\n",
-		     o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS);
+		     "%u.%u seconds, giving up and returning errors.\n",
+		     o2net_num_from_nn(nn),
+		     o2net_idle_timeout(node) / 1000,
+		     o2net_idle_timeout(node) % 1000);
 
 		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
 	}
@@ -1489,14 +1525,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 
 	/* ensure an immediate connect attempt */
 	nn->nn_last_connect_attempt = jiffies -
-		(msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1);
+		(msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
 
 	if (node_num != o2nm_this_node()) {
 		/* heartbeat doesn't work unless a local node number is
 		 * configured and doing so brings up the o2net_wq, so we can
 		 * use it.. */
 		queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
-				   O2NET_IDLE_TIMEOUT_SECS * HZ);
+		                   msecs_to_jiffies(o2net_idle_timeout(node)));
 
 		/* believe it or not, accept and node hearbeating testing
 		 * can succeed for this node before we got here.. so
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 616ff2b8434..2e08976050f 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data)
 
 #define O2NET_MAX_PAYLOAD_BYTES  (4096 - sizeof(struct o2net_msg))
 
+/* same as hb delay, we're waiting for another node to recognize our hb */
+#define O2NET_RECONNECT_DELAY_MS_DEFAULT	2000
+
+#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	5000
+#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		10000
+
+
 /* TODO: figure this out.... */
 static inline int o2net_link_down(int err, struct socket *sock)
 {
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index daebbd3a2c8..56f7ee1d254 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -27,17 +27,11 @@
 #define O2NET_MSG_KEEP_REQ_MAGIC  ((u16)0xfa57)
 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58)
 
-/* same as hb delay, we're waiting for another node to recognize our hb */
-#define O2NET_RECONNECT_DELAY_MS	O2HB_REGION_TIMEOUT_MS
-
 /* we're delaying our quorum decision so that heartbeat will have timed
  * out truly dead nodes by the time we come around to making decisions
  * on their number */
 #define O2NET_QUORUM_DELAY_MS	((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
 
-#define O2NET_KEEPALIVE_DELAY_SECS	5
-#define O2NET_IDLE_TIMEOUT_SECS		10
-
 /* 
  * This version number represents quite a lot, unfortunately.  It not
  * only represents the raw network message protocol on the wire but also
-- 
cgit v1.2.3


From 33ec32fae0e2c4433bfd1e74cbde6cb16604a719 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 8 Dec 2006 04:14:28 +0000
Subject: [CIFS] Fix NTLMv2 mounts to Windows servers

Windows servers are pickier about NTLMv2 than Samba.
This enables more secure mounts to Windows (not just Samba)
ie when "sec=ntlmv2" is specified on the mount.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES       | 3 ++-
 fs/cifs/cifsencrypt.c | 4 +++-
 fs/cifs/cifspdu.h     | 8 +++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0b3c37ef52e..3539d6ef961 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,8 @@ Allow null user to be specified on mount ("username="). Do not return
 EINVAL on readdir when filldir fails due to overwritten blocksize
 (fixes FC problem).  Return error in rename 2nd attempt retry (ie report
 if rename by handle also fails, after rename by path fails, we were
-not reporting whether the retry worked or not).
+not reporting whether the retry worked or not). Fix NTLMv2 to
+work to Windows servers (mount with option "sec=ntlmv2").
 
 Version 1.45
 ------------
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4bc250b2d9f..fdeda519eac 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -372,8 +372,10 @@ void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf,
 	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
 	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
 	buf->reserved2 = 0;
-	buf->names[0].type = 0;
+	buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
 	buf->names[0].length = 0;
+	buf->names[1].type = 0;
+	buf->names[1].length = 0;
 
 	/* calculate buf->ntlmv2_hash */
 	rc = calc_ntlmv2_hash(ses, nls_cp);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 6df9dadba64..068ef51edbf 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -580,6 +580,12 @@ typedef union smb_com_session_setup_andx {
 
 /* format of NLTMv2 Response ie "case sensitive password" hash when NTLMv2 */
 
+#define NTLMSSP_SERVER_TYPE	1
+#define NTLMSSP_DOMAIN_TYPE	2
+#define NTLMSSP_FQ_DOMAIN_TYPE	3
+#define NTLMSSP_DNS_DOMAIN_TYPE	4
+#define NTLMSSP_DNS_PARENT_TYPE	5
+
 struct ntlmssp2_name {
 	__le16 type;
 	__le16 length;
@@ -593,7 +599,7 @@ struct ntlmv2_resp {
 	__le64  time;
 	__u64  client_chal; /* random */
 	__u32  reserved2;
-	struct ntlmssp2_name names[1];
+	struct ntlmssp2_name names[2];
 	/* array of name entries could follow ending in minimum 4 byte struct */
 } __attribute__((packed));
 
-- 
cgit v1.2.3


From 01a732eb22dab153732b94962467eaabb1fd3797 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 8 Dec 2006 02:35:52 -0800
Subject: [PATCH] ext4 calls journal_stop

journal_stop() is not defined for ext4; change to ext4_journal_stop().

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1d85d4ec959..a127cc03c9f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1232,7 +1232,7 @@ retry:
 				from, to, NULL, do_journal_get_write_access);
 		if (ret)
 			/* fatal error, just put the handle and return */
-			journal_stop(handle);
+			ext4_journal_stop(handle);
 	}
 	return ret;
 
-- 
cgit v1.2.3


From a2ee8649ba6d71416712e798276bf7c40b64e6e5 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Fri, 8 Dec 2006 02:36:00 -0800
Subject: [PATCH] Fix linux banner utsname information

utsname information is shown in the linux banner, which also is used for
/proc/version (which can have different utsname values inside a uts
namespaces).  this patch makes the varying data arguments and changes the
string to a format string, using those arguments.

Signed-off-by: Herbert Poetzl <herbert@13thfloor.at>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 51815cece6f..9397ff62553 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -252,8 +252,8 @@ static int version_read_proc(char *page, char **start, off_t off,
 {
 	int len;
 
-	strcpy(page, linux_banner);
-	len = strlen(page);
+	len = sprintf(page, linux_banner,
+		utsname()->release, utsname()->version);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.3


From 24ec839c431eb79bb8f6abc00c4e1eb3b8c4d517 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:04 -0800
Subject: [PATCH] tty: ->signal->tty locking

Fix the locking of signal->tty.

Use ->sighand->siglock to protect ->signal->tty; this lock is already used
by most other members of ->signal/->sighand.  And unless we are 'current'
or the tasklist_lock is held we need ->siglock to access ->signal anyway.

(NOTE: sys_unshare() is broken wrt ->sighand locking rules)

Note that tty_mutex is held over tty destruction, so while holding
tty_mutex any tty pointer remains valid.  Otherwise the lifetime of ttys
are governed by their open file handles.  This leaves some holes for tty
access from signal->tty (or any other non file related tty access).

It solves the tty SLAB scribbles we were seeing.

(NOTE: the change from group_send_sig_info to __group_send_sig_info needs to
       be examined by someone familiar with the security framework, I think
       it is safe given the SEND_SIG_PRIV from other __group_send_sig_info
       invocations)

[schwidefsky@de.ibm.com: 3270 fix]
[akpm@osdl.org: various post-viro fixes]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Alan Cox <alan@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 14 ++++++++------
 fs/open.c  |  1 +
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index f9cd5e23ebd..89066b19124 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -828,6 +828,7 @@ static inline int need_print_warning(struct dquot *dquot)
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
+	struct tty_struct *tty;
 	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
 	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
@@ -835,14 +836,15 @@ static void print_warning(struct dquot *dquot, const char warntype)
 		return;
 
 	mutex_lock(&tty_mutex);
-	if (!current->signal->tty)
+	tty = get_current_tty();
+	if (!tty)
 		goto out_lock;
-	tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
+	tty_write_message(tty, dquot->dq_sb->s_id);
 	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
-		tty_write_message(current->signal->tty, ": warning, ");
+		tty_write_message(tty, ": warning, ");
 	else
-		tty_write_message(current->signal->tty, ": write failed, ");
-	tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
+		tty_write_message(tty, ": write failed, ");
+	tty_write_message(tty, quotatypes[dquot->dq_type]);
 	switch (warntype) {
 		case IHARDWARN:
 			msg = " file limit reached.\r\n";
@@ -863,7 +865,7 @@ static void print_warning(struct dquot *dquot, const char warntype)
 			msg = " block quota exceeded.\r\n";
 			break;
 	}
-	tty_write_message(current->signal->tty, msg);
+	tty_write_message(tty, msg);
 out_lock:
 	mutex_unlock(&tty_mutex);
 }
diff --git a/fs/open.c b/fs/open.c
index 89e0c237a63..3b56192816c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1087,6 +1087,7 @@ EXPORT_SYMBOL(sys_close);
 asmlinkage long sys_vhangup(void)
 {
 	if (capable(CAP_SYS_TTY_CONFIG)) {
+		/* XXX: this needs locking */
 		tty_vhangup(current->signal->tty);
 		return 0;
 	}
-- 
cgit v1.2.3


From 915935041281c64589e2b7fe38437be22567fb6f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:36:07 -0800
Subject: [PATCH] do_task_stat(): don't take tty_mutex

->signal->tty is protected by ->siglock, no need to take the global tty_mutex.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/array.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 25e917fb473..b0cd014a39b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -346,20 +346,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 	sigemptyset(&sigcatch);
 	cutime = cstime = utime = stime = cputime_zero;
 
-	mutex_lock(&tty_mutex);
 	rcu_read_lock();
 	if (lock_task_sighand(task, &flags)) {
 		struct signal_struct *sig = task->signal;
-		struct tty_struct *tty = sig->tty;
-
-		if (tty) {
-			/*
-			 * sig->tty is not stable, but tty_mutex
-			 * protects us from release_dev(tty)
-			 */
-			barrier();
-			tty_pgrp = tty->pgrp;
-			tty_nr = new_encode_dev(tty_devnum(tty));
+
+		if (sig->tty) {
+			tty_pgrp = sig->tty->pgrp;
+			tty_nr = new_encode_dev(tty_devnum(sig->tty));
 		}
 
 		num_threads = atomic_read(&sig->count);
@@ -395,7 +388,6 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		unlock_task_sighand(task, &flags);
 	}
 	rcu_read_unlock();
-	mutex_unlock(&tty_mutex);
 
 	if (!whole || num_threads<2)
 		wchan = get_wchan(task);
-- 
cgit v1.2.3


From 2e7b651df113c8a463853e4169951c52c39f9d19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:13 -0800
Subject: [PATCH] remove the old bd_mutex lockdep annotation

Remove the old complex and crufty bd_mutex annotation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 180 ++++++++++++++++++---------------------------------------
 1 file changed, 55 insertions(+), 125 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 13816b4d76f..36c38f48a4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -762,7 +762,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
+	mutex_lock(&bdev->bd_mutex);
 	res = bd_claim(bdev, holder);
 	if (res == 0) {
 		found = find_bd_holder(bdev, bo);
@@ -796,7 +796,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
+	mutex_lock(&bdev->bd_mutex);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
@@ -854,22 +854,6 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
 
 EXPORT_SYMBOL(open_by_devnum);
 
-static int
-blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
-
-struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
-{
-	struct block_device *bdev = bdget(dev);
-	int err = -ENOMEM;
-	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
-	if (bdev)
-		err = blkdev_get_partition(bdev, mode, flags);
-	return err ? ERR_PTR(err) : bdev;
-}
-
-EXPORT_SYMBOL(open_partition_by_devnum);
-
-
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -916,66 +900,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
-	int ret = 0;
-	struct inode *bd_inode = bdev->bd_inode;
-	struct gendisk *disk = bdev->bd_disk;
-
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-	lock_kernel();
-	if (!--bdev->bd_openers) {
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-	}
-	if (bdev->bd_contains == bdev) {
-		if (disk->fops->release)
-			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-				  subclass + 1);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
-	}
-	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
-		put_disk(disk);
-		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->kobj);
-			bdev->bd_part = NULL;
-		}
-		bdev->bd_disk = NULL;
-		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains)
-			__blkdev_put(bdev->bd_contains, subclass + 1);
-		bdev->bd_contains = NULL;
-	}
-	unlock_kernel();
-	mutex_unlock(&bdev->bd_mutex);
-	bdput(bdev);
-	return ret;
-}
-
-int blkdev_put(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-EXPORT_SYMBOL(blkdev_put);
-
-int blkdev_put_partition(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-EXPORT_SYMBOL(blkdev_put_partition);
-
-static int
-blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
-
-static int
-do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
+static int do_open(struct block_device *bdev, struct file *file)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
@@ -992,8 +917,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
 	}
 	owner = disk->fops->owner;
 
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-
+	mutex_lock(&bdev->bd_mutex);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -1020,11 +944,11 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
 			ret = -ENOMEM;
 			if (!whole)
 				goto out_first;
-			ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
+			ret = blkdev_get(whole, file->f_mode, file->f_flags);
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
+			mutex_lock(&whole->bd_mutex);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
@@ -1052,8 +976,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-					  BD_MUTEX_WHOLE);
+			mutex_lock(&bdev->bd_contains->bd_mutex);
 			bdev->bd_contains->bd_part_count++;
 			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
@@ -1067,7 +990,7 @@ out_first:
 	bdev->bd_disk = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
-		__blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
+		blkdev_put(bdev->bd_contains);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
@@ -1094,49 +1017,11 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
 	fake_file.f_dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
-	return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
+	return do_open(bdev, &fake_file);
 }
 
 EXPORT_SYMBOL(blkdev_get);
 
-static int
-blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
-{
-	/*
-	 * This crockload is due to bad choice of ->open() type.
-	 * It will go away.
-	 * For now, block device ->open() routine must _not_
-	 * examine anything in 'inode' argument except ->i_rdev.
-	 */
-	struct file fake_file = {};
-	struct dentry fake_dentry = {};
-	fake_file.f_mode = mode;
-	fake_file.f_flags = flags;
-	fake_file.f_dentry = &fake_dentry;
-	fake_dentry.d_inode = bdev->bd_inode;
-
-	return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
-}
-
-static int
-blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
-{
-	/*
-	 * This crockload is due to bad choice of ->open() type.
-	 * It will go away.
-	 * For now, block device ->open() routine must _not_
-	 * examine anything in 'inode' argument except ->i_rdev.
-	 */
-	struct file fake_file = {};
-	struct dentry fake_dentry = {};
-	fake_file.f_mode = mode;
-	fake_file.f_flags = flags;
-	fake_file.f_dentry = &fake_dentry;
-	fake_dentry.d_inode = bdev->bd_inode;
-
-	return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
-}
-
 static int blkdev_open(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev;
@@ -1154,7 +1039,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	if (bdev == NULL)
 		return -ENOMEM;
 
-	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
+	res = do_open(bdev, filp);
 	if (res)
 		return res;
 
@@ -1168,6 +1053,51 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	int ret = 0;
+	struct inode *bd_inode = bdev->bd_inode;
+	struct gendisk *disk = bdev->bd_disk;
+
+	mutex_lock(&bdev->bd_mutex);
+	lock_kernel();
+	if (!--bdev->bd_openers) {
+		sync_blockdev(bdev);
+		kill_bdev(bdev);
+	}
+	if (bdev->bd_contains == bdev) {
+		if (disk->fops->release)
+			ret = disk->fops->release(bd_inode, NULL);
+	} else {
+		mutex_lock(&bdev->bd_contains->bd_mutex);
+		bdev->bd_contains->bd_part_count--;
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
+	}
+	if (!bdev->bd_openers) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+
+		if (bdev->bd_contains != bdev) {
+			kobject_put(&bdev->bd_part->kobj);
+			bdev->bd_part = NULL;
+		}
+		bdev->bd_disk = NULL;
+		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+		if (bdev != bdev->bd_contains) {
+			blkdev_put(bdev->bd_contains);
+		}
+		bdev->bd_contains = NULL;
+	}
+	unlock_kernel();
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+
+EXPORT_SYMBOL(blkdev_put);
+
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
-- 
cgit v1.2.3


From 35a6027f1ab6594068cb8bca7705e4567753946b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:14 -0800
Subject: [PATCH] new bd_mutex lockdep annotation

Use the gendisk partition number to set a lock class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c38f48a4e..528cf053ce9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,10 +355,14 @@ static int bdev_set(struct inode *inode, void *data)
 
 static LIST_HEAD(all_bdevs);
 
+static struct lock_class_key bdev_part_lock_key;
+
 struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
 	struct inode *inode;
+	struct gendisk *disk;
+	int part = 0;
 
 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 			bdev_test, bdev_set, &dev);
@@ -384,6 +388,11 @@ struct block_device *bdget(dev_t dev)
 		list_add(&bdev->bd_list, &all_bdevs);
 		spin_unlock(&bdev_lock);
 		unlock_new_inode(inode);
+		mutex_init(&bdev->bd_mutex);
+		disk = get_gendisk(dev, &part);
+		if (part)
+			lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key);
+		put_disk(disk);
 	}
 	return bdev;
 }
-- 
cgit v1.2.3


From fd27c7a1bfa9801544ca961cdb4ca0b4998580b8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 8 Dec 2006 02:36:14 -0800
Subject: [PATCH] lockdep: remove lock_key approach to managing nested bd_mutex
 locks

The extra call to get_gendisk is not good.  It causes a ->probe and possible
module load before it is really appropriate to do this.

Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 528cf053ce9..36c38f48a4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,14 +355,10 @@ static int bdev_set(struct inode *inode, void *data)
 
 static LIST_HEAD(all_bdevs);
 
-static struct lock_class_key bdev_part_lock_key;
-
 struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
 	struct inode *inode;
-	struct gendisk *disk;
-	int part = 0;
 
 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 			bdev_test, bdev_set, &dev);
@@ -388,11 +384,6 @@ struct block_device *bdget(dev_t dev)
 		list_add(&bdev->bd_list, &all_bdevs);
 		spin_unlock(&bdev_lock);
 		unlock_new_inode(inode);
-		mutex_init(&bdev->bd_mutex);
-		disk = get_gendisk(dev, &part);
-		if (part)
-			lockdep_set_class(&bdev->bd_mutex, &bdev_part_lock_key);
-		put_disk(disk);
 	}
 	return bdev;
 }
-- 
cgit v1.2.3


From 37be41241f43109c55d92cdbf303399eea642f14 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 8 Dec 2006 02:36:16 -0800
Subject: [PATCH] lockdep: simplify some aspects of bd_mutex nesting

When we open (actually blkdev_get) a partition we need to also open (get) the
whole device that holds the partition.  The involves some limited recursion.
This patch tries to simplify some aspects of this.

As well as opening the whole device, we need to increment ->bd_part_count when
a partition is opened (this is used by rescan_partitions to avoid a rescan if
any partition is active, as that would be confusing).

The main change this patch makes is to move the inc/dec of bd_part_count into
blkdev_{get,put} for the whole rather than doing it in blkdev_{get,put} for
the partition.

More specifically, we introduce __blkdev_get and __blkdev_put which do exactly
what blkdev_{get,put} did, only with an extra "for_part" argument
(blkget_{get,put} then call the __ version with a '0' for the extra argument).

If for_part is 1, then the blkdev is being get(put) because a partition is
being opened(closed) for the first(last) time, and so bd_part_count should be
updated (on success).  The particular advantage of pushing this function down
is that the bd_mutex lock (which is needed to update bd_part_count) is already
held at the lower level.

Note that this slightly changes the semantics of bd_part_count.  Instead of
updating it whenever a partition is opened or released, it is now only updated
on the first open or last release.  This is an adequate semantic as it is only
ever tested for "== 0".

Having introduced these functions we remove the current bd_part_count updates
from do_open (which is really the body of blkdev_get) and call
__blkdev_get(...  1).  Similarly in blkget_put we remove the old bd_part_count
updates and call __blkget_put(..., 1).  This call is moved to the end of
__blkdev_put to avoid nested locks of bd_mutex.

Finally the mutex_lock on whole->bd_mutex in do_open can be removed.  It was
only really needed to protect bd_part_count, and that is now managed (and
protected) within the recursive call.

The observation that bd_part_count is central to the locking issues, and the
modifications to create __blkdev_put are from Peter Zijlstra.

Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 51 +++++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c38f48a4e..19f5f153ddb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -900,7 +900,10 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
-static int do_open(struct block_device *bdev, struct file *file)
+static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
+			int for_part);
+
+static int do_open(struct block_device *bdev, struct file *file, int for_part)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
@@ -944,25 +947,21 @@ static int do_open(struct block_device *bdev, struct file *file)
 			ret = -ENOMEM;
 			if (!whole)
 				goto out_first;
-			ret = blkdev_get(whole, file->f_mode, file->f_flags);
+			BUG_ON(for_part);
+			ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			mutex_lock(&whole->bd_mutex);
-			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
 			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
-				whole->bd_part_count--;
-				mutex_unlock(&whole->bd_mutex);
 				ret = -ENXIO;
 				goto out_first;
 			}
 			kobject_get(&p->kobj);
 			bdev->bd_part = p;
 			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
-			mutex_unlock(&whole->bd_mutex);
 		}
 	} else {
 		put_disk(disk);
@@ -975,13 +974,11 @@ static int do_open(struct block_device *bdev, struct file *file)
 			}
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
-		} else {
-			mutex_lock(&bdev->bd_contains->bd_mutex);
-			bdev->bd_contains->bd_part_count++;
-			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
 	}
 	bdev->bd_openers++;
+	if (for_part)
+		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
 	unlock_kernel();
 	return 0;
@@ -1002,7 +999,8 @@ out:
 	return ret;
 }
 
-int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
+static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
+			int for_part)
 {
 	/*
 	 * This crockload is due to bad choice of ->open() type.
@@ -1017,9 +1015,13 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
 	fake_file.f_dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
-	return do_open(bdev, &fake_file);
+	return do_open(bdev, &fake_file, for_part);
 }
 
+int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+	return __blkdev_get(bdev, mode, flags, 0);
+}
 EXPORT_SYMBOL(blkdev_get);
 
 static int blkdev_open(struct inode * inode, struct file * filp)
@@ -1039,7 +1041,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	if (bdev == NULL)
 		return -ENOMEM;
 
-	res = do_open(bdev, filp);
+	res = do_open(bdev, filp, 0);
 	if (res)
 		return res;
 
@@ -1053,14 +1055,18 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, int for_part)
 {
 	int ret = 0;
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
+	struct block_device *victim = NULL;
 
 	mutex_lock(&bdev->bd_mutex);
 	lock_kernel();
+	if (for_part)
+		bdev->bd_part_count--;
+
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
@@ -1068,10 +1074,6 @@ int blkdev_put(struct block_device *bdev)
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock(&bdev->bd_contains->bd_mutex);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
 	if (!bdev->bd_openers) {
 		struct module *owner = disk->fops->owner;
@@ -1085,17 +1087,22 @@ int blkdev_put(struct block_device *bdev)
 		}
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains) {
-			blkdev_put(bdev->bd_contains);
-		}
+		if (bdev != bdev->bd_contains)
+			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
+	if (victim)
+		__blkdev_put(victim, 1);
 	return ret;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, 0);
+}
 EXPORT_SYMBOL(blkdev_put);
 
 static int blkdev_close(struct inode * inode, struct file * filp)
-- 
cgit v1.2.3


From 6796bf54a64df36f96a42ae222423fffe36c58a5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 8 Dec 2006 02:36:16 -0800
Subject: [PATCH] lockdep: use mutex_lock_nested for bd_mutex to avoid lockdep
 warning

Now that the nesting in blkdev_{get,put} is simpler, adding mutex_lock_nested
is trivial.

Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 19f5f153ddb..846f32d51be 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -920,7 +920,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
 	}
 	owner = disk->fops->owner;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -1062,7 +1062,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	lock_kernel();
 	if (for_part)
 		bdev->bd_part_count--;
-- 
cgit v1.2.3


From c48f70c3d046f021b1c22438604ef2a583380eca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:18 -0800
Subject: [PATCH] bdev: fix ->bd_part_count leak

Don't leak a ->bd_part_count when the partition open fails with -ENXIO.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 846f32d51be..f3c3a44dd8b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -902,6 +902,7 @@ EXPORT_SYMBOL(bd_set_size);
 
 static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
 			int for_part);
+static int __blkdev_put(struct block_device *bdev, int for_part);
 
 static int do_open(struct block_device *bdev, struct file *file, int for_part)
 {
@@ -987,7 +988,7 @@ out_first:
 	bdev->bd_disk = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
-		blkdev_put(bdev->bd_contains);
+		__blkdev_put(bdev->bd_contains, 1);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
-- 
cgit v1.2.3


From 42cf11939becc717bd125d121a1a23415106a099 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:31 -0800
Subject: [PATCH] fsstack: Introduce fsstack_copy_{attr,inode}_*

Introduce several fsstack_copy_* functions which allow stackable filesystems
(such as eCryptfs and Unionfs) to easily copy over (currently only) inode
attributes.  This prevents code duplication and allows for code reuse.

[akpm@osdl.org: Remove unneeded wrapper]
[bunk@stusta.de: fs/stack.c should #include <linux/fs_stack.h>]
Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Makefile |  3 ++-
 fs/stack.c  | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 fs/stack.c

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index 9a5ce9323bf..b9ffa63f77f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,8 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
-		pnode.o drop_caches.o splice.o sync.o utimes.o
+		pnode.o drop_caches.o splice.o sync.o utimes.o \
+		stack.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/stack.c b/fs/stack.c
new file mode 100644
index 00000000000..5ddbc34535f
--- /dev/null
+++ b/fs/stack.c
@@ -0,0 +1,40 @@
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_stack.h>
+
+/* does _NOT_ require i_mutex to be held.
+ *
+ * This function cannot be inlined since i_size_{read,write} is rather
+ * heavy-weight on 32-bit systems
+ */
+void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+{
+	i_size_write(dst, i_size_read((struct inode *)src));
+	dst->i_blocks = src->i_blocks;
+}
+EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
+
+/* copy all attributes; get_nlinks is optional way to override the i_nlink
+ * copying
+ */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
+				int (*get_nlinks)(struct inode *))
+{
+	if (!get_nlinks)
+		dest->i_nlink = src->i_nlink;
+	else
+		dest->i_nlink = (*get_nlinks)(dest);
+
+	dest->i_mode = src->i_mode;
+	dest->i_uid = src->i_uid;
+	dest->i_gid = src->i_gid;
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+
+	fsstack_copy_inode_size(dest, src);
+}
+EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
-- 
cgit v1.2.3


From 0cc72dc7f050188d8d7344b1dd688cbc68d3cd30 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:31 -0800
Subject: [PATCH] eCryptfs: Use fsstack's generic copy inode attr functions

Replace eCryptfs specific code & calls with the more generic fsstack
equivalents and remove the eCryptfs specific functions.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/dentry.c          |  3 +-
 fs/ecryptfs/ecryptfs_kernel.h |  4 +--
 fs/ecryptfs/file.c            |  3 +-
 fs/ecryptfs/inode.c           | 75 +++++++++++--------------------------------
 fs/ecryptfs/main.c            |  5 +--
 5 files changed, 27 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 52d1e36dc74..329efcd3d8c 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -25,6 +25,7 @@
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/fs_stack.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -61,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
 
-		ecryptfs_copy_attr_all(dentry->d_inode, lower_inode);
+		fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
 	}
 out:
 	return rc;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index f992533d169..424137fb589 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -28,6 +28,7 @@
 
 #include <keys/user-type.h>
 #include <linux/fs.h>
+#include <linux/fs_stack.h>
 #include <linux/scatterlist.h>
 
 /* Version verification for shared data structures w/ userspace */
@@ -413,9 +414,6 @@ int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
 			     const char *name, int length,
 			     char **encoded_name);
 struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
-void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src);
-void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src);
-void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src);
 void ecryptfs_dump_hex(char *data, int bytes);
 int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
 			int sg_size);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 42099e779a5..c48e4590b11 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -30,6 +30,7 @@
 #include <linux/security.h>
 #include <linux/smp_lock.h>
 #include <linux/compat.h>
+#include <linux/fs_stack.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -192,7 +193,7 @@ retry:
 		goto retry;
 	file->f_pos = lower_file->f_pos;
 	if (rc >= 0)
-		ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
+		fsstack_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 8a1945a84c3..c07a937b21a 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -30,6 +30,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/crypto.h>
+#include <linux/fs_stack.h>
 #include "ecryptfs_kernel.h"
 
 static struct dentry *lock_parent(struct dentry *dentry)
@@ -53,48 +54,6 @@ static void unlock_dir(struct dentry *dir)
 	dput(dir);
 }
 
-void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src)
-{
-	i_size_write(dst, i_size_read((struct inode *)src));
-	dst->i_blocks = src->i_blocks;
-}
-
-void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src)
-{
-	dest->i_atime = src->i_atime;
-}
-
-static void ecryptfs_copy_attr_times(struct inode *dest,
-				     const struct inode *src)
-{
-	dest->i_atime = src->i_atime;
-	dest->i_mtime = src->i_mtime;
-	dest->i_ctime = src->i_ctime;
-}
-
-static void ecryptfs_copy_attr_timesizes(struct inode *dest,
-					 const struct inode *src)
-{
-	dest->i_atime = src->i_atime;
-	dest->i_mtime = src->i_mtime;
-	dest->i_ctime = src->i_ctime;
-	ecryptfs_copy_inode_size(dest, src);
-}
-
-void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src)
-{
-	dest->i_mode = src->i_mode;
-	dest->i_nlink = src->i_nlink;
-	dest->i_uid = src->i_uid;
-	dest->i_gid = src->i_gid;
-	dest->i_rdev = src->i_rdev;
-	dest->i_atime = src->i_atime;
-	dest->i_mtime = src->i_mtime;
-	dest->i_ctime = src->i_ctime;
-	dest->i_blkbits = src->i_blkbits;
-	dest->i_flags = src->i_flags;
-}
-
 /**
  * ecryptfs_create_underlying_file
  * @lower_dir_inode: inode of the parent in the lower fs of the new file
@@ -171,8 +130,8 @@ ecryptfs_do_create(struct inode *directory_inode,
 		ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
 		goto out_lock;
 	}
-	ecryptfs_copy_attr_timesizes(directory_inode,
-				     lower_dir_dentry->d_inode);
+	fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
 out_lock:
 	unlock_dir(lower_dir_dentry);
 out:
@@ -365,7 +324,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
        		"d_name.name = [%s]\n", lower_dentry,
 		lower_dentry->d_name.name);
 	lower_inode = lower_dentry->d_inode;
-	ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_atime(dir, lower_dir_dentry->d_inode);
 	BUG_ON(!atomic_read(&lower_dentry->d_count));
 	ecryptfs_set_dentry_private(dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
@@ -462,7 +421,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
 	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
 	if (rc)
 		goto out_lock;
-	ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
 	old_dentry->d_inode->i_nlink =
 		ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
 	i_size_write(new_dentry->d_inode, file_size_save);
@@ -488,7 +448,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
 		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
 		goto out_unlock;
 	}
-	ecryptfs_copy_attr_times(dir, lower_dir_inode);
+	fsstack_copy_attr_times(dir, lower_dir_inode);
 	dentry->d_inode->i_nlink =
 		ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
 	dentry->d_inode->i_ctime = dir->i_ctime;
@@ -527,7 +487,8 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
 	if (rc)
 		goto out_lock;
-	ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
 out_lock:
 	unlock_dir(lower_dir_dentry);
 	dput(lower_dentry);
@@ -550,7 +511,8 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
 	if (rc)
 		goto out;
-	ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
 	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
 out:
 	unlock_dir(lower_dir_dentry);
@@ -573,7 +535,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 	dput(lower_dentry);
 	if (!rc)
 		d_delete(lower_dentry);
-	ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
 	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
 	unlock_dir(lower_dir_dentry);
 	if (!rc)
@@ -597,7 +559,8 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
 	if (rc)
 		goto out;
-	ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
 out:
 	unlock_dir(lower_dir_dentry);
 	if (!dentry->d_inode)
@@ -626,9 +589,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry);
 	if (rc)
 		goto out_lock;
-	ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
 	if (new_dir != old_dir)
-		ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_new_dentry->d_parent);
@@ -684,8 +647,8 @@ ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
 				rc = -EFAULT;
 		}
 		kfree(decoded_name);
-		ecryptfs_copy_attr_atime(dentry->d_inode,
-					 lower_dentry->d_inode);
+		fsstack_copy_attr_atime(dentry->d_inode,
+					lower_dentry->d_inode);
 	}
 out_free_lower_buf:
 	kfree(lower_buf);
@@ -915,7 +878,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	}
 	rc = notify_change(lower_dentry, ia);
 out:
-	ecryptfs_copy_attr_all(inode, lower_inode);
+	fsstack_copy_attr_all(inode, lower_inode, NULL);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 3ede12b2593..d0541ae8fab 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,6 +35,7 @@
 #include <linux/pagemap.h>
 #include <linux/key.h>
 #include <linux/parser.h>
+#include <linux/fs_stack.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -112,10 +113,10 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		d_add(dentry, inode);
 	else
 		d_instantiate(dentry, inode);
-	ecryptfs_copy_attr_all(inode, lower_inode);
+	fsstack_copy_attr_all(inode, lower_inode, NULL);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
-	ecryptfs_copy_inode_size(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
 out:
 	return rc;
 }
-- 
cgit v1.2.3


From fec6d055da71fb02a76f9c2c12427fa79974018b Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:32 -0800
Subject: [PATCH] struct path: rename Reiserfs's struct path

Rename Reiserfs's struct path to struct treepath to prevent name collision
between it and struct path from fs/namei.c.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/bitmap.c          |  2 +-
 fs/reiserfs/fix_node.c        |  6 +++---
 fs/reiserfs/inode.c           | 12 ++++++------
 fs/reiserfs/namei.c           |  6 +++---
 fs/reiserfs/stree.c           | 42 +++++++++++++++++++++---------------------
 fs/reiserfs/tail_conversion.c |  4 ++--
 6 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e3d466a228d..b286ccb0858 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -708,7 +708,7 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint)
  */
 static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
 {
-	struct path *path;
+	struct treepath *path;
 	struct buffer_head *bh;
 	struct item_head *ih;
 	int pos_in_item;
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 6d0e554daa9..0ee35c6c9b7 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -957,7 +957,7 @@ static int get_far_parent(struct tree_balance *p_s_tb,
 {
 	struct buffer_head *p_s_parent;
 	INITIALIZE_PATH(s_path_to_neighbor_father);
-	struct path *p_s_path = p_s_tb->tb_path;
+	struct treepath *p_s_path = p_s_tb->tb_path;
 	struct cpu_key s_lr_father_key;
 	int n_counter,
 	    n_position = INT_MAX,
@@ -1074,7 +1074,7 @@ static int get_far_parent(struct tree_balance *p_s_tb,
  */
 static int get_parents(struct tree_balance *p_s_tb, int n_h)
 {
-	struct path *p_s_path = p_s_tb->tb_path;
+	struct treepath *p_s_path = p_s_tb->tb_path;
 	int n_position,
 	    n_ret_value,
 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
@@ -1885,7 +1885,7 @@ static int check_balance(int mode,
 static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
 {
 	struct buffer_head *p_s_bh;
-	struct path *p_s_path = p_s_tb->tb_path;
+	struct treepath *p_s_path = p_s_tb->tb_path;
 	int n_position,
 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 254239e6f9e..f3d1c4a7797 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -207,7 +207,7 @@ static int file_capable(struct inode *inode, long block)
 }
 
 /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th,
-				   struct inode *inode, struct path *path)
+				   struct inode *inode, struct treepath *path)
 {
 	struct super_block *s = th->t_super;
 	int len = th->t_blocks_allocated;
@@ -570,7 +570,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th,
 				  long block,
 				  struct inode *inode,
 				  b_blocknr_t * allocated_block_nr,
-				  struct path *path, int flags)
+				  struct treepath *path, int flags)
 {
 	BUG_ON(!th->t_trans_id);
 
@@ -1107,7 +1107,7 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
 //
 
 // called by read_locked_inode
-static void init_inode(struct inode *inode, struct path *path)
+static void init_inode(struct inode *inode, struct treepath *path)
 {
 	struct buffer_head *bh;
 	struct item_head *ih;
@@ -1284,7 +1284,7 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
 /* NOTE, you must prepare the buffer head before sending it here,
 ** and then log it after the call
 */
-static void update_stat_data(struct path *path, struct inode *inode,
+static void update_stat_data(struct treepath *path, struct inode *inode,
 			     loff_t size)
 {
 	struct buffer_head *bh;
@@ -1653,7 +1653,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
    containing "." and ".." entries */
 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
 				  struct inode *inode,
-				  struct item_head *ih, struct path *path,
+				  struct item_head *ih, struct treepath *path,
 				  struct inode *dir)
 {
 	struct super_block *sb = th->t_super;
@@ -1712,7 +1712,7 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
    containing the body of symlink */
 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
 				struct item_head *ih,
-				struct path *path, const char *symname,
+				struct treepath *path, const char *symname,
 				int item_len)
 {
 	struct super_block *sb = th->t_super;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index abde1edc223..23f5cd5bbf5 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -54,7 +54,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
 
 // comment?  maybe something like set de to point to what the path points to?
 static inline void set_de_item_location(struct reiserfs_dir_entry *de,
-					struct path *path)
+					struct treepath *path)
 {
 	de->de_bh = get_last_bh(path);
 	de->de_ih = get_ih(path);
@@ -113,7 +113,7 @@ entry position in the item
 
 /* The function is NOT SCHEDULE-SAFE! */
 int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
-			struct path *path, struct reiserfs_dir_entry *de)
+			struct treepath *path, struct reiserfs_dir_entry *de)
 {
 	int retval;
 
@@ -282,7 +282,7 @@ static int linear_search_in_dir_item(struct cpu_key *key,
 // may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
 // FIXME: should add something like IOERROR
 static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
-			       struct path *path_to_entry,
+			       struct treepath *path_to_entry,
 			       struct reiserfs_dir_entry *de)
 {
 	struct cpu_key key_to_search;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5240abe1a70..47e7027ea39 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -244,7 +244,7 @@ static const struct reiserfs_key MAX_KEY = {
    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
    the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
    case we return a special key, either MIN_KEY or MAX_KEY. */
-static inline const struct reiserfs_key *get_lkey(const struct path
+static inline const struct reiserfs_key *get_lkey(const struct treepath
 						  *p_s_chk_path,
 						  const struct super_block
 						  *p_s_sb)
@@ -290,7 +290,7 @@ static inline const struct reiserfs_key *get_lkey(const struct path
 }
 
 /* Get delimiting key of the buffer at the path and its right neighbor. */
-inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
+inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path,
 					   const struct super_block *p_s_sb)
 {
 	int n_position, n_path_offset = p_s_chk_path->path_length;
@@ -337,7 +337,7 @@ inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
    the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
    buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
    this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
-static inline int key_in_buffer(struct path *p_s_chk_path,	/* Path which should be checked.  */
+static inline int key_in_buffer(struct treepath *p_s_chk_path,	/* Path which should be checked.  */
 				const struct cpu_key *p_s_key,	/* Key which should be checked.   */
 				struct super_block *p_s_sb	/* Super block pointer.           */
     )
@@ -374,7 +374,7 @@ inline void decrement_bcount(struct buffer_head *p_s_bh)
 }
 
 /* Decrement b_count field of the all buffers in the path. */
-void decrement_counters_in_path(struct path *p_s_search_path)
+void decrement_counters_in_path(struct treepath *p_s_search_path)
 {
 	int n_path_offset = p_s_search_path->path_length;
 
@@ -391,7 +391,7 @@ void decrement_counters_in_path(struct path *p_s_search_path)
 	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 }
 
-int reiserfs_check_path(struct path *p)
+int reiserfs_check_path(struct treepath *p)
 {
 	RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
 	       "path not properly relsed");
@@ -403,7 +403,7 @@ int reiserfs_check_path(struct path *p)
 **
 ** only called from fix_nodes()
 */
-void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
+void pathrelse_and_restore(struct super_block *s, struct treepath *p_s_search_path)
 {
 	int n_path_offset = p_s_search_path->path_length;
 
@@ -421,7 +421,7 @@ void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
 }
 
 /* Release all buffers in the path. */
-void pathrelse(struct path *p_s_search_path)
+void pathrelse(struct treepath *p_s_search_path)
 {
 	int n_path_offset = p_s_search_path->path_length;
 
@@ -602,7 +602,7 @@ static void search_by_key_reada(struct super_block *s,
    correctness of the bottom of the path */
 /* The function is NOT SCHEDULE-SAFE! */
 int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/* Key to search. */
-		  struct path *p_s_search_path,	/* This structure was
+		  struct treepath *p_s_search_path,/* This structure was
 						   allocated and initialized
 						   by the calling
 						   function. It is filled up
@@ -813,7 +813,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
 /* The function is NOT SCHEDULE-SAFE! */
 int search_for_position_by_key(struct super_block *p_s_sb,	/* Pointer to the super block.          */
 			       const struct cpu_key *p_cpu_key,	/* Key to search (cpu variable)         */
-			       struct path *p_s_search_path	/* Filled up by this function.          */
+			       struct treepath *p_s_search_path	/* Filled up by this function.          */
     )
 {
 	struct item_head *p_le_ih;	/* pointer to on-disk structure */
@@ -884,7 +884,7 @@ int search_for_position_by_key(struct super_block *p_s_sb,	/* Pointer to the sup
 }
 
 /* Compare given item and item pointed to by the path. */
-int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
+int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path)
 {
 	struct buffer_head *p_s_bh;
 	struct item_head *ih;
@@ -911,7 +911,7 @@ int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
 
 // prepare for delete or cut of direct item
-static inline int prepare_for_direct_item(struct path *path,
+static inline int prepare_for_direct_item(struct treepath *path,
 					  struct item_head *le_ih,
 					  struct inode *inode,
 					  loff_t new_file_length, int *cut_size)
@@ -952,7 +952,7 @@ static inline int prepare_for_direct_item(struct path *path,
 	return M_CUT;		/* Cut from this item. */
 }
 
-static inline int prepare_for_direntry_item(struct path *path,
+static inline int prepare_for_direntry_item(struct treepath *path,
 					    struct item_head *le_ih,
 					    struct inode *inode,
 					    loff_t new_file_length,
@@ -987,7 +987,7 @@ static inline int prepare_for_direntry_item(struct path *path,
     In case of file truncate calculate whether this item must be deleted/truncated or last
     unformatted node of this item will be converted to a direct item.
     This function returns a determination of what balance mode the calling function should employ. */
-static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed,	/* Number of unformatted nodes which were removed
+static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed,	/* Number of unformatted nodes which were removed
 																						   from end of the file. */
 				      int *p_n_cut_size, unsigned long long n_new_file_length	/* MAX_KEY_OFFSET in case of delete. */
     )
@@ -1125,7 +1125,7 @@ static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
 static void init_tb_struct(struct reiserfs_transaction_handle *th,
 			   struct tree_balance *p_s_tb,
 			   struct super_block *p_s_sb,
-			   struct path *p_s_path, int n_size)
+			   struct treepath *p_s_path, int n_size)
 {
 
 	BUG_ON(!th->t_trans_id);
@@ -1176,7 +1176,7 @@ char head2type(struct item_head *ih)
 #endif
 
 /* Delete object item. */
-int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path,	/* Path to the deleted item. */
+int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path,	/* Path to the deleted item. */
 			 const struct cpu_key *p_s_item_key,	/* Key to search for the deleted item.  */
 			 struct inode *p_s_inode,	/* inode is here just to update i_blocks and quotas */
 			 struct buffer_head *p_s_un_bh)
@@ -1468,7 +1468,7 @@ static void unmap_buffers(struct page *page, loff_t pos)
 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
 				    struct inode *p_s_inode,
 				    struct page *page,
-				    struct path *p_s_path,
+				    struct treepath *p_s_path,
 				    const struct cpu_key *p_s_item_key,
 				    loff_t n_new_file_size, char *p_c_mode)
 {
@@ -1503,7 +1503,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
    pointer being converted. Therefore we have to delete inserted
    direct item(s) */
 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
-					 struct inode *inode, struct path *path)
+					 struct inode *inode, struct treepath *path)
 {
 	struct cpu_key tail_key;
 	int tail_len;
@@ -1545,7 +1545,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
 
 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
-			   struct path *p_s_path,
+			   struct treepath *p_s_path,
 			   struct cpu_key *p_s_item_key,
 			   struct inode *p_s_inode,
 			   struct page *page, loff_t n_new_file_size)
@@ -1920,7 +1920,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 
 #ifdef CONFIG_REISERFS_CHECK
 // this makes sure, that we __append__, not overwrite or add holes
-static void check_research_for_paste(struct path *path,
+static void check_research_for_paste(struct treepath *path,
 				     const struct cpu_key *p_s_key)
 {
 	struct item_head *found_ih = get_ih(path);
@@ -1954,7 +1954,7 @@ static void check_research_for_paste(struct path *path,
 #endif				/* config reiserfs check */
 
 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
-int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path,	/* Path to the pasted item.          */
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_search_path,	/* Path to the pasted item.          */
 			     const struct cpu_key *p_s_key,	/* Key to search for the needed item. */
 			     struct inode *inode,	/* Inode item belongs to */
 			     const char *p_c_body,	/* Pointer to the bytes to paste.    */
@@ -2036,7 +2036,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path
 }
 
 /* Insert new item into the buffer at the path. */
-int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path,	/* Path to the inserteded item.         */
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path,	/* Path to the inserteded item.         */
 			 const struct cpu_key *key, struct item_head *p_s_ih,	/* Pointer to the item header to insert. */
 			 struct inode *inode, const char *p_c_body)
 {				/* Pointer to the bytes to insert.      */
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 36f108fc1cf..f8121a1147e 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -15,7 +15,7 @@
 /* path points to first direct item of the file regarless of how many of
    them are there */
 int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
-		    struct path *path, struct buffer_head *unbh,
+		    struct treepath *path, struct buffer_head *unbh,
 		    loff_t tail_offset)
 {
 	struct super_block *sb = inode->i_sb;
@@ -171,7 +171,7 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
    what we expect from it (number of cut bytes). But when tail remains
    in the unformatted node, we set mode to SKIP_BALANCING and unlock
    inode */
-int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct path *p_s_path,	/* path to the indirect item. */
+int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct treepath *p_s_path,	/* path to the indirect item. */
 		    const struct cpu_key *p_s_item_key,	/* Key to look for unformatted node pointer to be cut. */
 		    loff_t n_new_file_size,	/* New file size. */
 		    char *p_c_mode)
-- 
cgit v1.2.3


From 346f20ff6020ffa11d40b789069079c56a444ae6 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:34 -0800
Subject: [PATCH] struct path: move struct path from fs/namei.c into
 include/linux

Moved struct path from fs/namei.c to include/linux/namei.h.  This allows many
places in the VFS, as well as any stackable filesystem to easily keep track of
dentry-vfsmount pairs.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index db1bca26d88..8c2db88bb20 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -572,11 +572,6 @@ fail:
 	return PTR_ERR(link);
 }
 
-struct path {
-	struct vfsmount *mnt;
-	struct dentry *dentry;
-};
-
 static inline void dput_path(struct path *path, struct nameidata *nd)
 {
 	dput(path->dentry);
-- 
cgit v1.2.3


From b65d34fd465f19fbe2f32f2205a9a06ca7c2bdeb Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:34 -0800
Subject: [PATCH] struct path: make eCryptfs a user of struct path

Convert eCryptfs dentry-vfsmount pairs in dentry private data to struct
path.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/ecryptfs_kernel.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 424137fb589..afb64bdbe6a 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -29,6 +29,7 @@
 #include <keys/user-type.h>
 #include <linux/fs.h>
 #include <linux/fs_stack.h>
+#include <linux/namei.h>
 #include <linux/scatterlist.h>
 
 /* Version verification for shared data structures w/ userspace */
@@ -228,8 +229,7 @@ struct ecryptfs_inode_info {
 /* dentry private data. Each dentry must keep track of a lower
  * vfsmount too. */
 struct ecryptfs_dentry_info {
-	struct dentry *wdi_dentry;
-	struct vfsmount *lower_mnt;
+	struct path lower_path;
 	struct ecryptfs_crypt_stat *crypt_stat;
 };
 
@@ -356,26 +356,26 @@ ecryptfs_set_dentry_private(struct dentry *dentry,
 static inline struct dentry *
 ecryptfs_dentry_to_lower(struct dentry *dentry)
 {
-	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry;
+	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
 }
 
 static inline void
 ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
 {
-	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry =
+	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
 		lower_dentry;
 }
 
 static inline struct vfsmount *
 ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
 {
-	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt;
+	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt;
 }
 
 static inline void
 ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
 {
-	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt =
+	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
 		lower_mnt;
 }
 
-- 
cgit v1.2.3


From 0f7fc9e4d03987fe29f6dd4aa67e4c56eb7ecb05 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:35 -0800
Subject: [PATCH] VFS: change struct file to use struct path

This patch changes struct file to use struct path instead of having
independent pointers to struct dentry and struct vfsmount, and converts all
users of f_{dentry,vfsmnt} in fs/ to use f_path.{dentry,mnt}.

Additionally, it adds two #define's to make the transition easier for users of
the f_dentry and f_vfsmnt.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_aout.c      |  8 ++++----
 fs/binfmt_elf.c       |  2 +-
 fs/binfmt_elf_fdpic.c |  4 ++--
 fs/binfmt_flat.c      |  2 +-
 fs/binfmt_misc.c      | 10 +++++-----
 fs/block_dev.c        |  4 ++--
 fs/compat.c           | 12 ++++++------
 fs/compat_ioctl.c     |  2 +-
 fs/dnotify.c          |  4 ++--
 fs/dquot.c            |  4 ++--
 fs/eventpoll.c        |  4 ++--
 fs/exec.c             | 10 +++++-----
 fs/fcntl.c            |  2 +-
 fs/file_table.c       | 10 +++++-----
 fs/inode.c            |  2 +-
 fs/inotify_user.c     |  6 +++---
 fs/ioctl.c            | 14 +++++++-------
 fs/libfs.c            | 14 +++++++-------
 fs/locks.c            | 32 ++++++++++++++++----------------
 fs/namei.c            | 10 +++++-----
 fs/open.c             | 26 +++++++++++++-------------
 fs/pipe.c             | 28 ++++++++++++++--------------
 fs/read_write.c       | 20 ++++++++++----------
 fs/readdir.c          |  2 +-
 fs/seq_file.c         |  2 +-
 fs/splice.c           | 18 +++++++++---------
 fs/stat.c             |  2 +-
 fs/super.c            |  2 +-
 fs/sync.c             |  4 ++--
 fs/xattr.c            |  8 ++++----
 30 files changed, 134 insertions(+), 134 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 517e111bb7e..813a887cd2b 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -274,7 +274,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -389,7 +389,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		{
 			printk(KERN_WARNING 
 			       "fd_offset is not page aligned. Please convert program: %s\n",
-			       bprm->file->f_dentry->d_name.name);
+			       bprm->file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
 
@@ -469,7 +469,7 @@ static int load_aout_library(struct file *file)
 	int retval;
 	struct exec ex;
 
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 
 	retval = -ENOEXEC;
 	error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
@@ -506,7 +506,7 @@ static int load_aout_library(struct file *file)
 		{
 			printk(KERN_WARNING 
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
-			       file->f_dentry->d_name.name);
+			       file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
 		down_write(&current->mm->mmap_sem);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index be5869d3499..c6dbb4a7ec7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1190,7 +1190,7 @@ static int maydump(struct vm_area_struct *vma)
 
 	/* Dump shared memory only if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED)
-		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
+		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
 
 	/* If it hasn't been written to, don't write it out */
 	if (!vma->anon_vma)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ed9a61c6beb..9f0b7efc3df 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -855,7 +855,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 
 dynamic_error:
 	printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
-	       what, file->f_dentry->d_inode->i_ino);
+	       what, file->f_path.dentry->d_inode->i_ino);
 	return -ELIBBAD;
 }
 
@@ -1186,7 +1186,7 @@ static int maydump(struct vm_area_struct *vma)
 
 	/* Dump shared memory only if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
-		if (vma->vm_file->f_dentry->d_inode->i_nlink == 0) {
+		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) {
 			kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags);
 			return 1;
 		}
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a62fd4018a2..ae8595d4985 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -429,7 +429,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	int ret;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
-	inode = bprm->file->f_dentry->d_inode;
+	inode = bprm->file->f_path.dentry->d_inode;
 
 	text_len  = ntohl(hdr->data_start);
 	data_len  = ntohl(hdr->data_end) - ntohl(hdr->data_start);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1713c48fef5..00687ea6273 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -542,7 +542,7 @@ static void kill_node(Node *e)
 static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
-	Node *e = file->f_dentry->d_inode->i_private;
+	Node *e = file->f_path.dentry->d_inode->i_private;
 	loff_t pos = *ppos;
 	ssize_t res;
 	char *page;
@@ -576,7 +576,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
 	struct dentry *root;
-	Node *e = file->f_dentry->d_inode->i_private;
+	Node *e = file->f_path.dentry->d_inode->i_private;
 	int res = parse_command(buffer, count);
 
 	switch (res) {
@@ -584,7 +584,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 			break;
 		case 2: set_bit(Enabled, &e->flags);
 			break;
-		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
+		case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
 
 			kill_node(e);
@@ -610,7 +610,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 	Node *e;
 	struct inode *inode;
 	struct dentry *root, *dentry;
-	struct super_block *sb = file->f_vfsmnt->mnt_sb;
+	struct super_block *sb = file->f_path.mnt->mnt_sb;
 	int err = 0;
 
 	e = create_entry(buffer, count);
@@ -699,7 +699,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
 	switch (res) {
 		case 1: enabled = 0; break;
 		case 2: enabled = 1; break;
-		case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
+		case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
 			mutex_lock(&root->d_inode->i_mutex);
 
 			while (!list_empty(&entries))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f3c3a44dd8b..197f9392184 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -190,7 +190,7 @@ static int blkdev_commit_write(struct file *file, struct page *page, unsigned fr
 
 /*
  * private llseek:
- * for a block special file file->f_dentry->d_inode->i_size is zero
+ * for a block special file file->f_path.dentry->d_inode->i_size is zero
  * so we compute the size by hand (just as in block_read/write above)
  */
 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
@@ -1013,7 +1013,7 @@ static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
 	struct dentry fake_dentry = {};
 	fake_file.f_mode = mode;
 	fake_file.f_flags = flags;
-	fake_file.f_dentry = &fake_dentry;
+	fake_file.f_path.dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
 	return do_open(bdev, &fake_file, for_part);
diff --git a/fs/compat.c b/fs/compat.c
index a7e3f162fb1..b766964a625 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -232,7 +232,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry, &tmp);
+	error = vfs_statfs(file->f_path.dentry, &tmp);
 	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
@@ -303,7 +303,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry, &tmp);
+	error = vfs_statfs(file->f_path.dentry, &tmp);
 	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
@@ -365,7 +365,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
 	/* find the name of the device. */
 	path = (char *)__get_free_page(GFP_KERNEL);
 	if (path) {
-		fn = d_path(filp->f_dentry, filp->f_vfsmnt, path, PAGE_SIZE);
+		fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE);
 		if (IS_ERR(fn))
 			fn = "?";
 	}
@@ -416,7 +416,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
-		if (S_ISREG(filp->f_dentry->d_inode->i_mode))
+		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			break;
 		/*FALL THROUGH*/
 
@@ -438,7 +438,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 			goto found_handler;
 	}
 
-	if (S_ISSOCK(filp->f_dentry->d_inode->i_mode) &&
+	if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) &&
 	    cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
 		error = siocdevprivate_ioctl(fd, cmd, arg);
 	} else {
@@ -1259,7 +1259,7 @@ out:
 	if (iov != iovstack)
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
-		struct dentry *dentry = file->f_dentry;
+		struct dentry *dentry = file->f_path.dentry;
 		if (type == READ)
 			fsnotify_access(dentry);
 		else
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index bcc3caf5d82..c81c958b3e1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1177,7 +1177,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 static int vt_check(struct file *file)
 {
 	struct tty_struct *tty;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	
 	if (file->f_op->ioctl != tty_ioctl)
 		return -EINVAL;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 1f26a2b9eee..936409fcd93 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -42,7 +42,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	struct dnotify_struct **prev;
 	struct inode *inode;
 
-	inode = filp->f_dentry->d_inode;
+	inode = filp->f_path.dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return;
 	spin_lock(&inode->i_lock);
@@ -74,7 +74,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	}
 	if (!dir_notify_enable)
 		return -EINVAL;
-	inode = filp->f_dentry->d_inode;
+	inode = filp->f_path.dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return -ENOTDIR;
 	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
diff --git a/fs/dquot.c b/fs/dquot.c
index 89066b19124..0952cc474d9 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -694,9 +694,9 @@ restart:
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
 		struct file *filp = list_entry(p, struct file, f_u.fu_list);
-		struct inode *inode = filp->f_dentry->d_inode;
+		struct inode *inode = filp->f_path.dentry->d_inode;
 		if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
-			struct dentry *dentry = dget(filp->f_dentry);
+			struct dentry *dentry = dget(filp->f_path.dentry);
 			file_list_unlock();
 			sb->dq_op->initialize(inode, type);
 			dput(dentry);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 88a6f8d0b88..3ae644e7e86 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -795,8 +795,8 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
 		goto eexit_4;
 	dentry->d_op = &eventpollfs_dentry_operations;
 	d_add(dentry, inode);
-	file->f_vfsmnt = mntget(eventpoll_mnt);
-	file->f_dentry = dentry;
+	file->f_path.mnt = mntget(eventpoll_mnt);
+	file->f_path.dentry = dentry;
 	file->f_mapping = inode->i_mapping;
 
 	file->f_pos = 0;
diff --git a/fs/exec.c b/fs/exec.c
index add0e03c3ea..60433e2254a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -912,7 +912,7 @@ EXPORT_SYMBOL(flush_old_exec);
 int prepare_binprm(struct linux_binprm *bprm)
 {
 	int mode;
-	struct inode * inode = bprm->file->f_dentry->d_inode;
+	struct inode * inode = bprm->file->f_path.dentry->d_inode;
 	int retval;
 
 	mode = inode->i_mode;
@@ -922,7 +922,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	bprm->e_uid = current->euid;
 	bprm->e_gid = current->egid;
 
-	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+	if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
 		/* Set-uid? */
 		if (mode & S_ISUID) {
 			current->personality &= ~PER_CLEAR_ON_SETID;
@@ -1519,10 +1519,10 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 				 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_dentry))
+	if (!ispipe && d_unhashed(file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1533,7 +1533,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		goto close_fail;
 	if (!file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_dentry, 0, 0, file) != 0)
+	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
 		goto close_fail;
 
 	retval = binfmt->core_dump(signr, regs, file);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 4740d35e52c..2bdaef35da5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -204,7 +204,7 @@ asmlinkage long sys_dup(unsigned int fildes)
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
-	struct inode * inode = filp->f_dentry->d_inode;
+	struct inode * inode = filp->f_path.dentry->d_inode;
 	int error = 0;
 
 	/*
diff --git a/fs/file_table.c b/fs/file_table.c
index 24f25a057d9..4c17a18d8c1 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -152,8 +152,8 @@ EXPORT_SYMBOL(fput);
  */
 void fastcall __fput(struct file *file)
 {
-	struct dentry *dentry = file->f_dentry;
-	struct vfsmount *mnt = file->f_vfsmnt;
+	struct dentry *dentry = file->f_path.dentry;
+	struct vfsmount *mnt = file->f_path.mnt;
 	struct inode *inode = dentry->d_inode;
 
 	might_sleep();
@@ -176,8 +176,8 @@ void fastcall __fput(struct file *file)
 		put_write_access(inode);
 	put_pid(file->f_owner.pid);
 	file_kill(file);
-	file->f_dentry = NULL;
-	file->f_vfsmnt = NULL;
+	file->f_path.dentry = NULL;
+	file->f_path.mnt = NULL;
 	file_free(file);
 	dput(dentry);
 	mntput(mnt);
@@ -271,7 +271,7 @@ int fs_may_remount_ro(struct super_block *sb)
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
 		struct file *file = list_entry(p, struct file, f_u.fu_list);
-		struct inode *inode = file->f_dentry->d_inode;
+		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
 		if (inode->i_nlink == 0)
diff --git a/fs/inode.c b/fs/inode.c
index 9ecccab7326..d00de182ecb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1200,7 +1200,7 @@ EXPORT_SYMBOL(touch_atime);
 
 void file_update_time(struct file *file)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct timespec now;
 	int sync_it = 0;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index e1956e6f116..55f6da55b7c 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -570,9 +570,9 @@ asmlinkage long sys_inotify_init(void)
 	dev->ih = ih;
 
 	filp->f_op = &inotify_fops;
-	filp->f_vfsmnt = mntget(inotify_mnt);
-	filp->f_dentry = dget(inotify_mnt->mnt_root);
-	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+	filp->f_path.mnt = mntget(inotify_mnt);
+	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
+	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
 	filp->f_flags = O_RDONLY;
 	filp->private_data = dev;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 4b7660b09ac..ff61772ceed 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -31,7 +31,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
 		goto out;
 	} else if (filp->f_op->ioctl) {
 		lock_kernel();
-		error = filp->f_op->ioctl(filp->f_dentry->d_inode,
+		error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
 					  filp, cmd, arg);
 		unlock_kernel();
 	}
@@ -45,7 +45,7 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 {
 	int error;
 	int block;
-	struct inode * inode = filp->f_dentry->d_inode;
+	struct inode * inode = filp->f_path.dentry->d_inode;
 	int __user *p = (int __user *)arg;
 
 	switch (cmd) {
@@ -137,17 +137,17 @@ int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned lon
 			break;
 
 		case FIOQSIZE:
-			if (S_ISDIR(filp->f_dentry->d_inode->i_mode) ||
-			    S_ISREG(filp->f_dentry->d_inode->i_mode) ||
-			    S_ISLNK(filp->f_dentry->d_inode->i_mode)) {
-				loff_t res = inode_get_bytes(filp->f_dentry->d_inode);
+			if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) ||
+			    S_ISREG(filp->f_path.dentry->d_inode->i_mode) ||
+			    S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) {
+				loff_t res = inode_get_bytes(filp->f_path.dentry->d_inode);
 				error = copy_to_user((loff_t __user *)arg, &res, sizeof(res)) ? -EFAULT : 0;
 			}
 			else
 				error = -ENOTTY;
 			break;
 		default:
-			if (S_ISREG(filp->f_dentry->d_inode->i_mode))
+			if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 				error = file_ioctl(filp, cmd, arg);
 			else
 				error = do_ioctl(filp, cmd, arg);
diff --git a/fs/libfs.c b/fs/libfs.c
index bd08e0e64a8..503898d5c4a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -63,7 +63,7 @@ int dcache_dir_open(struct inode *inode, struct file *file)
 {
 	static struct qstr cursor_name = {.len = 1, .name = "."};
 
-	file->private_data = d_alloc(file->f_dentry, &cursor_name);
+	file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
 
 	return file->private_data ? 0 : -ENOMEM;
 }
@@ -76,7 +76,7 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -84,7 +84,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -96,8 +96,8 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 
 			spin_lock(&dcache_lock);
 			list_del(&cursor->d_u.d_child);
-			p = file->f_dentry->d_subdirs.next;
-			while (n && p != &file->f_dentry->d_subdirs) {
+			p = file->f_path.dentry->d_subdirs.next;
+			while (n && p != &file->f_path.dentry->d_subdirs) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
 				if (!d_unhashed(next) && next->d_inode)
@@ -108,7 +108,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			spin_unlock(&dcache_lock);
 		}
 	}
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -126,7 +126,7 @@ static inline unsigned char dt_type(struct inode *inode)
 
 int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct dentry *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->d_u.d_child;
 	ino_t ino;
diff --git a/fs/locks.c b/fs/locks.c
index 1cb0c57fedb..52a81005dab 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -321,7 +321,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 		start = filp->f_pos;
 		break;
 	case SEEK_END:
-		start = i_size_read(filp->f_dentry->d_inode);
+		start = i_size_read(filp->f_path.dentry->d_inode);
 		break;
 	default:
 		return -EINVAL;
@@ -371,7 +371,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		start = filp->f_pos;
 		break;
 	case SEEK_END:
-		start = i_size_read(filp->f_dentry->d_inode);
+		start = i_size_read(filp->f_path.dentry->d_inode);
 		break;
 	default:
 		return -EINVAL;
@@ -672,7 +672,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl,
 	struct file_lock *cfl;
 
 	lock_kernel();
-	for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
+	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
 			continue;
 		if (posix_locks_conflict(cfl, fl))
@@ -734,7 +734,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 {
 	struct file_lock *new_fl = NULL;
 	struct file_lock **before;
-	struct inode * inode = filp->f_dentry->d_inode;
+	struct inode * inode = filp->f_path.dentry->d_inode;
 	int error = 0;
 	int found = 0;
 
@@ -1018,7 +1018,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
  */
 int posix_lock_file(struct file *filp, struct file_lock *fl)
 {
-	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL);
+	return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, NULL);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1033,7 +1033,7 @@ EXPORT_SYMBOL(posix_lock_file);
 int posix_lock_file_conf(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock);
+	return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file_conf);
 
@@ -1333,8 +1333,8 @@ int fcntl_getlease(struct file *filp)
 	int type = F_UNLCK;
 
 	lock_kernel();
-	time_out_leases(filp->f_dentry->d_inode);
-	for (fl = filp->f_dentry->d_inode->i_flock; fl && IS_LEASE(fl);
+	time_out_leases(filp->f_path.dentry->d_inode);
+	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
 		if (fl->fl_file == filp) {
 			type = fl->fl_type & ~F_INPROGRESS;
@@ -1359,7 +1359,7 @@ int fcntl_getlease(struct file *filp)
 static int __setlease(struct file *filp, long arg, struct file_lock **flp)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error, rdlease_count = 0, wrlease_count = 0;
 
@@ -1448,7 +1448,7 @@ out:
 
 int setlease(struct file *filp, long arg, struct file_lock **lease)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1482,7 +1482,7 @@ EXPORT_SYMBOL(setlease);
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
 	struct file_lock fl, *flp = &fl;
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1692,7 +1692,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = filp->f_dentry->d_inode;
+	inode = filp->f_path.dentry->d_inode;
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -1835,7 +1835,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = filp->f_dentry->d_inode;
+	inode = filp->f_path.dentry->d_inode;
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -1922,7 +1922,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	if (!filp->f_dentry->d_inode->i_flock)
+	if (!filp->f_path.dentry->d_inode->i_flock)
 		return;
 
 	lock.fl_type = F_UNLCK;
@@ -1951,7 +1951,7 @@ EXPORT_SYMBOL(locks_remove_posix);
  */
 void locks_remove_flock(struct file *filp)
 {
-	struct inode * inode = filp->f_dentry->d_inode; 
+	struct inode * inode = filp->f_path.dentry->d_inode;
 	struct file_lock *fl;
 	struct file_lock **before;
 
@@ -2020,7 +2020,7 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 	struct inode *inode = NULL;
 
 	if (fl->fl_file != NULL)
-		inode = fl->fl_file->f_dentry->d_inode;
+		inode = fl->fl_file->f_path.dentry->d_inode;
 
 	out += sprintf(out, "%d:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
diff --git a/fs/namei.c b/fs/namei.c
index 8c2db88bb20..e4f108f0823 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -297,7 +297,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  */
 int file_permission(struct file *file, int mask)
 {
-	return permission(file->f_dentry->d_inode, mask, NULL);
+	return permission(file->f_path.dentry->d_inode, mask, NULL);
 }
 
 /*
@@ -333,7 +333,7 @@ int get_write_access(struct inode * inode)
 
 int deny_write_access(struct file * file)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 
 	spin_lock(&inode->i_lock);
 	if (atomic_read(&inode->i_writecount) > 0) {
@@ -368,7 +368,7 @@ void path_release_on_umount(struct nameidata *nd)
  */
 void release_open_intent(struct nameidata *nd)
 {
-	if (nd->intent.open.file->f_dentry == NULL)
+	if (nd->intent.open.file->f_path.dentry == NULL)
 		put_filp(nd->intent.open.file);
 	else
 		fput(nd->intent.open.file);
@@ -1138,7 +1138,7 @@ static int fastcall do_path_lookup(int dfd, const char *name,
 		if (!file)
 			goto out_fail;
 
-		dentry = file->f_dentry;
+		dentry = file->f_path.dentry;
 
 		retval = -ENOTDIR;
 		if (!S_ISDIR(dentry->d_inode->i_mode))
@@ -1148,7 +1148,7 @@ static int fastcall do_path_lookup(int dfd, const char *name,
 		if (retval)
 			goto fput_fail;
 
-		nd->mnt = mntget(file->f_vfsmnt);
+		nd->mnt = mntget(file->f_path.mnt);
 		nd->dentry = dget(dentry);
 
 		fput_light(file, fput_needed);
diff --git a/fs/open.c b/fs/open.c
index 3b56192816c..0d94319e868 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -165,7 +165,7 @@ asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry, &tmp);
+	error = vfs_statfs_native(file->f_path.dentry, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -186,7 +186,7 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry, &tmp);
+	error = vfs_statfs64(file->f_path.dentry, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -302,7 +302,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 	if (file->f_flags & O_LARGEFILE)
 		small = 0;
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
 	error = -EINVAL;
 	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
@@ -448,8 +448,8 @@ asmlinkage long sys_fchdir(unsigned int fd)
 	if (!file)
 		goto out;
 
-	dentry = file->f_dentry;
-	mnt = file->f_vfsmnt;
+	dentry = file->f_path.dentry;
+	mnt = file->f_path.mnt;
 	inode = dentry->d_inode;
 
 	error = -ENOTDIR;
@@ -503,7 +503,7 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	if (!file)
 		goto out;
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
 
 	audit_inode(NULL, inode);
@@ -662,7 +662,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 	if (!file)
 		goto out;
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	audit_inode(NULL, dentry->d_inode);
 	error = chown_common(dentry, user, group);
 	fput(file);
@@ -688,8 +688,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	}
 
 	f->f_mapping = inode->i_mapping;
-	f->f_dentry = dentry;
-	f->f_vfsmnt = mnt;
+	f->f_path.dentry = dentry;
+	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
 	file_move(f, &inode->i_sb->s_files);
@@ -723,8 +723,8 @@ cleanup_all:
 	if (f->f_mode & FMODE_WRITE)
 		put_write_access(inode);
 	file_kill(f);
-	f->f_dentry = NULL;
-	f->f_vfsmnt = NULL;
+	f->f_path.dentry = NULL;
+	f->f_path.mnt = NULL;
 cleanup_file:
 	put_filp(f);
 	dput(dentry);
@@ -822,7 +822,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	/* Pick up the filp from the open intent */
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
-	if (filp->f_dentry == NULL)
+	if (filp->f_path.dentry == NULL)
 		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
 	else
 		path_release(nd);
@@ -965,7 +965,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 				put_unused_fd(fd);
 				fd = PTR_ERR(f);
 			} else {
-				fsnotify_open(f->f_dentry);
+				fsnotify_open(f->f_path.dentry);
 				fd_install(fd, f);
 			}
 		}
diff --git a/fs/pipe.c b/fs/pipe.c
index ae36b89b1a3..f8b6bdcb879 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -222,7 +222,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	   unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct pipe_inode_info *pipe;
 	int do_wakeup;
 	ssize_t ret;
@@ -335,7 +335,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 	    unsigned long nr_segs, loff_t ppos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct pipe_inode_info *pipe;
 	ssize_t ret;
 	int do_wakeup;
@@ -520,7 +520,7 @@ static int
 pipe_ioctl(struct inode *pino, struct file *filp,
 	   unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct pipe_inode_info *pipe;
 	int count, buf, nrbufs;
 
@@ -548,7 +548,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct pipe_inode_info *pipe = inode->i_pipe;
 	int nrbufs;
 
@@ -601,7 +601,7 @@ pipe_release(struct inode *inode, int decr, int decw)
 static int
 pipe_read_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -618,7 +618,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
 static int
 pipe_write_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -635,7 +635,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
 static int
 pipe_rdwr_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct pipe_inode_info *pipe = inode->i_pipe;
 	int retval;
 
@@ -914,8 +914,8 @@ struct file *create_write_pipe(void)
 	 */
 	dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(dentry, inode);
-	f->f_vfsmnt = mntget(pipe_mnt);
-	f->f_dentry = dentry;
+	f->f_path.mnt = mntget(pipe_mnt);
+	f->f_path.dentry = dentry;
 	f->f_mapping = inode->i_mapping;
 
 	f->f_flags = O_WRONLY;
@@ -935,8 +935,8 @@ struct file *create_write_pipe(void)
 
 void free_write_pipe(struct file *f)
 {
-	mntput(f->f_vfsmnt);
-	dput(f->f_dentry);
+	mntput(f->f_path.mnt);
+	dput(f->f_path.dentry);
 	put_filp(f);
 }
 
@@ -947,9 +947,9 @@ struct file *create_read_pipe(struct file *wrf)
 		return ERR_PTR(-ENFILE);
 
 	/* Grab pipe from the writer */
-	f->f_vfsmnt = mntget(wrf->f_vfsmnt);
-	f->f_dentry = dget(wrf->f_dentry);
-	f->f_mapping = wrf->f_dentry->d_inode->i_mapping;
+	f->f_path.mnt = mntget(wrf->f_path.mnt);
+	f->f_path.dentry = dget(wrf->f_path.dentry);
+	f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
 
 	f->f_pos = 0;
 	f->f_flags = O_RDONLY;
diff --git a/fs/read_write.c b/fs/read_write.c
index f792000a28e..1d3dda4fa70 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -64,13 +64,13 @@ loff_t remote_llseek(struct file *file, loff_t offset, int origin)
 	lock_kernel();
 	switch (origin) {
 		case 2:
-			offset += i_size_read(file->f_dentry->d_inode);
+			offset += i_size_read(file->f_path.dentry->d_inode);
 			break;
 		case 1:
 			offset += file->f_pos;
 	}
 	retval = -EINVAL;
-	if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) {
+	if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
@@ -95,7 +95,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
 	lock_kernel();
 	switch (origin) {
 		case 2:
-			offset += i_size_read(file->f_dentry->d_inode);
+			offset += i_size_read(file->f_path.dentry->d_inode);
 			break;
 		case 1:
 			offset += file->f_pos;
@@ -203,7 +203,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
 		goto Einval;
 
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 	if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
 		int retval = locks_mandatory_area(
 			read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
@@ -273,7 +273,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 			else
 				ret = do_sync_read(file, buf, count, pos);
 			if (ret > 0) {
-				fsnotify_access(file->f_dentry);
+				fsnotify_access(file->f_path.dentry);
 				current->rchar += ret;
 			}
 			current->syscr++;
@@ -331,7 +331,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 			else
 				ret = do_sync_write(file, buf, count, pos);
 			if (ret > 0) {
-				fsnotify_modify(file->f_dentry);
+				fsnotify_modify(file->f_path.dentry);
 				current->wchar += ret;
 			}
 			current->syscw++;
@@ -628,9 +628,9 @@ out:
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
-			fsnotify_access(file->f_dentry);
+			fsnotify_access(file->f_path.dentry);
 		else
-			fsnotify_modify(file->f_dentry);
+			fsnotify_modify(file->f_path.dentry);
 	}
 	return ret;
 }
@@ -722,7 +722,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (!(in_file->f_mode & FMODE_READ))
 		goto fput_in;
 	retval = -EINVAL;
-	in_inode = in_file->f_dentry->d_inode;
+	in_inode = in_file->f_path.dentry->d_inode;
 	if (!in_inode)
 		goto fput_in;
 	if (!in_file->f_op || !in_file->f_op->sendfile)
@@ -754,7 +754,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	retval = -EINVAL;
 	if (!out_file->f_op || !out_file->f_op->sendpage)
 		goto fput_out;
-	out_inode = out_file->f_dentry->d_inode;
+	out_inode = out_file->f_path.dentry->d_inode;
 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 	if (retval < 0)
 		goto fput_out;
diff --git a/fs/readdir.c b/fs/readdir.c
index bff3ee58e2f..f39f5b31325 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -21,7 +21,7 @@
 
 int vfs_readdir(struct file *file, filldir_t filler, void *buf)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 10690aa401c..0ac22af7afe 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(seq_lseek);
 /**
  *	seq_release -	free the structures associated with sequential file.
  *	@file: file in question
- *	@inode: file->f_dentry->d_inode
+ *	@inode: file->f_path.dentry->d_inode
  *
  *	Frees the structures associated with sequential file; can be used
  *	as ->f_op->release() if you don't have private data to destroy.
diff --git a/fs/splice.c b/fs/splice.c
index da74583a00e..bbd0aeb3f68 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -844,7 +844,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
-	err = remove_suid(out->f_dentry);
+	err = remove_suid(out->f_path.dentry);
 	if (unlikely(err))
 		return err;
 
@@ -890,10 +890,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
-	err = should_remove_suid(out->f_dentry);
+	err = should_remove_suid(out->f_path.dentry);
 	if (unlikely(err)) {
 		mutex_lock(&inode->i_mutex);
-		err = __remove_suid(out->f_dentry, err);
+		err = __remove_suid(out->f_path.dentry, err);
 		mutex_unlock(&inode->i_mutex);
 		if (err)
 			return err;
@@ -1008,7 +1008,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 	 * randomly drop data for eg socket -> socket splicing. Use the
 	 * piped splicing for that!
 	 */
-	i_mode = in->f_dentry->d_inode->i_mode;
+	i_mode = in->f_path.dentry->d_inode->i_mode;
 	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
 		return -EINVAL;
 
@@ -1132,7 +1132,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	pipe = pipe_info(in->f_dentry->d_inode);
+	pipe = pipe_info(in->f_path.dentry->d_inode);
 	if (pipe) {
 		if (off_in)
 			return -ESPIPE;
@@ -1153,7 +1153,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		return ret;
 	}
 
-	pipe = pipe_info(out->f_dentry->d_inode);
+	pipe = pipe_info(out->f_path.dentry->d_inode);
 	if (pipe) {
 		if (off_out)
 			return -ESPIPE;
@@ -1321,7 +1321,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	pipe = pipe_info(file->f_dentry->d_inode);
+	pipe = pipe_info(file->f_path.dentry->d_inode);
 	if (!pipe)
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
@@ -1549,8 +1549,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = pipe_info(in->f_dentry->d_inode);
-	struct pipe_inode_info *opipe = pipe_info(out->f_dentry->d_inode);
+	struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
+	struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
 	int ret = -EINVAL;
 
 	/*
diff --git a/fs/stat.c b/fs/stat.c
index a0ebfc7f8a6..38a8cb2a28d 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -102,7 +102,7 @@ int vfs_fstat(unsigned int fd, struct kstat *stat)
 	int error = -EBADF;
 
 	if (f) {
-		error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
+		error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
 		fput(f);
 	}
 	return error;
diff --git a/fs/super.c b/fs/super.c
index 84c320f6ad7..f961e030799 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -570,7 +570,7 @@ static void mark_files_ro(struct super_block *sb)
 
 	file_list_lock();
 	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		if (S_ISREG(f->f_dentry->d_inode->i_mode) && file_count(f))
+		if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f))
 			f->f_mode &= ~FMODE_WRITE;
 	}
 	file_list_unlock();
diff --git a/fs/sync.c b/fs/sync.c
index 865f32be386..d0feff61e6a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -94,7 +94,7 @@ long do_fsync(struct file *file, int datasync)
 	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
-	err = file->f_op->fsync(file, file->f_dentry, datasync);
+	err = file->f_op->fsync(file, file->f_path.dentry, datasync);
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
@@ -223,7 +223,7 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 	if (!file)
 		goto out;
 
-	i_mode = file->f_dentry->d_inode->i_mode;
+	i_mode = file->f_path.dentry->d_inode->i_mode;
 	ret = -ESPIPE;
 	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
 			!S_ISLNK(i_mode))
diff --git a/fs/xattr.c b/fs/xattr.c
index 0901bdc2ce2..38646132ab0 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -268,7 +268,7 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 	f = fget(fd);
 	if (!f)
 		return error;
-	dentry = f->f_dentry;
+	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry->d_inode);
 	error = setxattr(dentry, name, value, size, flags);
 	fput(f);
@@ -351,7 +351,7 @@ sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size)
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = getxattr(f->f_dentry, name, value, size);
+	error = getxattr(f->f_path.dentry, name, value, size);
 	fput(f);
 	return error;
 }
@@ -423,7 +423,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
 	f = fget(fd);
 	if (!f)
 		return error;
-	error = listxattr(f->f_dentry, list, size);
+	error = listxattr(f->f_path.dentry, list, size);
 	fput(f);
 	return error;
 }
@@ -484,7 +484,7 @@ sys_fremovexattr(int fd, char __user *name)
 	f = fget(fd);
 	if (!f)
 		return error;
-	dentry = f->f_dentry;
+	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry->d_inode);
 	error = removexattr(dentry, name);
 	fput(f);
-- 
cgit v1.2.3


From f427f5d5d6b0eb729505a2d9c0a6cad22734a74c Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:36 -0800
Subject: [PATCH] sysfs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the sysfs
filesystem code.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysfs/bin.c  | 14 +++++++-------
 fs/sysfs/dir.c  | 10 +++++-----
 fs/sysfs/file.c | 16 ++++++++--------
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 98022e41cda..e8f540d38d4 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -35,7 +35,7 @@ static ssize_t
 read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
 {
 	char *buffer = file->private_data;
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
 	int ret;
@@ -81,7 +81,7 @@ static ssize_t write(struct file * file, const char __user * userbuf,
 		     size_t count, loff_t * off)
 {
 	char *buffer = file->private_data;
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
 
@@ -105,7 +105,7 @@ static ssize_t write(struct file * file, const char __user * userbuf,
 
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct bin_attribute *attr = to_bin_attr(dentry);
 	struct kobject *kobj = to_kobj(dentry->d_parent);
 
@@ -117,8 +117,8 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
 
 static int open(struct inode * inode, struct file * file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
+	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
 	int error = -EINVAL;
 
 	if (!kobj || !attr)
@@ -153,8 +153,8 @@ static int open(struct inode * inode, struct file * file)
 
 static int release(struct inode * inode, struct file * file)
 {
-	struct kobject * kobj = to_kobj(file->f_dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
+	struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
+	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
 	u8 * buffer = file->private_data;
 
 	if (kobj) 
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index a5782e8c7f0..511edef8b32 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -419,7 +419,7 @@ out:
 
 static int sysfs_dir_open(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
@@ -432,7 +432,7 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
 
 static int sysfs_dir_close(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * cursor = file->private_data;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
@@ -452,7 +452,7 @@ static inline unsigned char dt_type(struct sysfs_dirent *sd)
 
 static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 	struct sysfs_dirent *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->s_sibling;
@@ -509,7 +509,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
 static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
@@ -519,7 +519,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 95c165101c9..9cfe53e1e00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -154,7 +154,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 	down(&buffer->sem);
 	if (buffer->needs_read_fill) {
-		if ((retval = fill_read_buffer(file->f_dentry,buffer)))
+		if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
 			goto out;
 	}
 	pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
@@ -245,7 +245,7 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
 	down(&buffer->sem);
 	len = fill_write_buffer(buffer, buf, count);
 	if (len > 0)
-		len = flush_write_buffer(file->f_dentry, buffer, len);
+		len = flush_write_buffer(file->f_path.dentry, buffer, len);
 	if (len > 0)
 		*ppos += len;
 	up(&buffer->sem);
@@ -254,8 +254,8 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
 
 static int check_perm(struct inode * inode, struct file * file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_dentry->d_parent);
-	struct attribute * attr = to_attr(file->f_dentry);
+	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+	struct attribute * attr = to_attr(file->f_path.dentry);
 	struct sysfs_buffer * buffer;
 	struct sysfs_ops * ops = NULL;
 	int error = 0;
@@ -337,8 +337,8 @@ static int sysfs_open_file(struct inode * inode, struct file * filp)
 
 static int sysfs_release(struct inode * inode, struct file * filp)
 {
-	struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
-	struct attribute * attr = to_attr(filp->f_dentry);
+	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+	struct attribute * attr = to_attr(filp->f_path.dentry);
 	struct module * owner = attr->owner;
 	struct sysfs_buffer * buffer = filp->private_data;
 
@@ -372,8 +372,8 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
 	struct sysfs_buffer * buffer = filp->private_data;
-	struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
-	struct sysfs_dirent * sd = filp->f_dentry->d_fsdata;
+	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+	struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
 	int res = 0;
 
 	poll_wait(filp, &kobj->poll, wait);
-- 
cgit v1.2.3


From 2fddfeefeed703b7638af97aa3048f82a2d53b03 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:36 -0800
Subject: [PATCH] proc: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the proc
filesystem code.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c       | 36 ++++++++++++++++++------------------
 fs/proc/generic.c    | 10 +++++-----
 fs/proc/nommu.c      |  4 ++--
 fs/proc/task_mmu.c   |  8 ++++----
 fs/proc/task_nommu.c |  4 ++--
 5 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b859fc749c0..a71f1755bb5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -472,7 +472,7 @@ static struct file_operations proc_mountstats_operations = {
 static ssize_t proc_info_read(struct file * file, char __user * buf,
 			  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	unsigned long page;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -512,7 +512,7 @@ static int mem_open(struct inode* inode, struct file* file)
 static ssize_t mem_read(struct file * file, char __user * buf,
 			size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char *page;
 	unsigned long src = *ppos;
 	int ret = -ESRCH;
@@ -584,7 +584,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
 {
 	int copied;
 	char *page;
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	unsigned long dst = *ppos;
 
 	copied = -ESRCH;
@@ -654,7 +654,7 @@ static struct file_operations proc_mem_operations = {
 static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char buffer[PROC_NUMBUF];
 	size_t len;
 	int oom_adjust;
@@ -694,7 +694,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 		return -EINVAL;
 	if (*end == '\n')
 		end++;
-	task = get_proc_task(file->f_dentry->d_inode);
+	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
 	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
@@ -718,7 +718,7 @@ static struct file_operations proc_oom_adjust_operations = {
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	struct task_struct *task = get_proc_task(inode);
 	ssize_t length;
 	char tmpbuf[TMPBUFLEN];
@@ -734,7 +734,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 				   size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	char *page, *tmp;
 	ssize_t length;
 	uid_t loginuid;
@@ -1078,7 +1078,7 @@ static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	char *name, int len,
 	instantiate_t instantiate, struct task_struct *task, void *ptr)
 {
-	struct dentry *child, *dir = filp->f_dentry;
+	struct dentry *child, *dir = filp->f_path.dentry;
 	struct inode *inode;
 	struct qstr qname;
 	ino_t ino = 0;
@@ -1157,8 +1157,8 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			*mnt = mntget(file->f_vfsmnt);
-			*dentry = dget(file->f_dentry);
+			*mnt = mntget(file->f_path.mnt);
+			*dentry = dget(file->f_path.dentry);
 			spin_unlock(&files->file_lock);
 			put_files_struct(files);
 			return 0;
@@ -1293,7 +1293,7 @@ static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir
 
 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *p = get_proc_task(inode);
 	unsigned int fd, tid, ino;
@@ -1440,7 +1440,7 @@ static int proc_pident_readdir(struct file *filp,
 {
 	int i;
 	int pid;
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task = get_proc_task(inode);
 	struct pid_entry *p, *last;
@@ -1496,7 +1496,7 @@ out_no_task:
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	unsigned long page;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -1512,7 +1512,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 		goto out;
 
 	length = security_getprocattr(task,
-				      (char*)file->f_dentry->d_name.name,
+				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
 	if (length >= 0)
 		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
@@ -1526,7 +1526,7 @@ out_no_task:
 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 				   size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	char *page;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -1552,7 +1552,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 		goto out_free;
 
 	length = security_setprocattr(task,
-				      (char*)file->f_dentry->d_name.name,
+				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
 out_free:
 	free_page((unsigned long) page);
@@ -1994,7 +1994,7 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode);
+	struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
 	struct task_struct *task;
 	int tgid;
 
@@ -2235,7 +2235,7 @@ static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filld
 /* for the /proc/TGID/task/ directories */
 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *leader = get_proc_task(inode);
 	struct task_struct *task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4ba03009cf7..853cb877d5f 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -52,7 +52,7 @@ static ssize_t
 proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	       loff_t *ppos)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	char 	*page;
 	ssize_t	retval=0;
 	int	eof=0;
@@ -203,7 +203,7 @@ static ssize_t
 proc_file_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct proc_dir_entry * dp;
 	
 	dp = PDE(inode);
@@ -432,7 +432,7 @@ int proc_readdir(struct file * filp,
 	struct proc_dir_entry * de;
 	unsigned int ino;
 	int i;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret = 0;
 
 	lock_kernel();
@@ -453,7 +453,7 @@ int proc_readdir(struct file * filp,
 			/* fall through */
 		case 1:
 			if (filldir(dirent, "..", 2, i,
-				    parent_ino(filp->f_dentry),
+				    parent_ino(filp->f_path.dentry),
 				    DT_DIR) < 0)
 				goto out;
 			i++;
@@ -558,7 +558,7 @@ static void proc_kill_inodes(struct proc_dir_entry *de)
 	file_list_lock();
 	list_for_each(p, &sb->s_files) {
 		struct file * filp = list_entry(p, struct file, f_u.fu_list);
-		struct dentry * dentry = filp->f_dentry;
+		struct dentry * dentry = filp->f_path.dentry;
 		struct inode * inode;
 		const struct file_operations *fops;
 
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index d7dbdf9e0f4..5ec67257e5f 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -46,7 +46,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 	file = vma->vm_file;
 
 	if (file) {
-		struct inode *inode = vma->vm_file->f_dentry->d_inode;
+		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
@@ -67,7 +67,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 		if (len < 1)
 			len = 1;
 		seq_printf(m, "%*c", len, ' ');
-		seq_path(m, file->f_vfsmnt, file->f_dentry, "");
+		seq_path(m, file->f_path.mnt, file->f_path.dentry, "");
 	}
 
 	seq_putc(m, '\n');
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6b769afac55..55ade0d1562 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -94,8 +94,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
+		*mnt = mntget(vma->vm_file->f_path.mnt);
+		*dentry = dget(vma->vm_file->f_path.dentry);
 		result = 0;
 	}
 
@@ -135,7 +135,7 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
 	int len;
 
 	if (file) {
-		struct inode *inode = vma->vm_file->f_dentry->d_inode;
+		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
@@ -156,7 +156,7 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
 	 */
 	if (file) {
 		pad_len_spaces(m, len);
-		seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
+		seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
 	} else {
 		const char *name = arch_vma_name(vma);
 		if (!name) {
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 091aa8e48e0..fcc5caf93f5 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -126,8 +126,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
+		*mnt = mntget(vma->vm_file->f_path.mnt);
+		*dentry = dget(vma->vm_file->f_path.dentry);
 		result = 0;
 	}
 
-- 
cgit v1.2.3


From c29c6934304a05781c9bd1611bd98870689f34e7 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:37 -0800
Subject: [PATCH] ext2: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the ext2
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/dir.c   | 2 +-
 fs/ext2/ioctl.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 3e7a84a1e50..0b02ba9642d 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -248,7 +248,7 @@ static int
 ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index e3cf8c81507..4b099d31071 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -90,7 +90,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 #ifdef CONFIG_COMPAT
 long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
-- 
cgit v1.2.3


From fe21a69389165b9641a8d0b89e00a71c4e5f5d59 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:38 -0800
Subject: [PATCH] ext3: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the ext3
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c   | 8 ++++----
 fs/ext3/file.c  | 2 +-
 fs/ext3/ioctl.c | 2 +-
 fs/ext3/namei.c | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 5a9313ecd4e..665adee99b3 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -103,7 +103,7 @@ static int ext3_readdir(struct file * filp,
 	struct ext3_dir_entry_2 *de;
 	struct super_block *sb;
 	int err;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret = 0;
 
 	sb = inode->i_sb;
@@ -122,7 +122,7 @@ static int ext3_readdir(struct file * filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+		EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
 	}
 #endif
 	stored = 0;
@@ -402,7 +402,7 @@ static int call_filldir(struct file * filp, void * dirent,
 {
 	struct dir_private_info *info = filp->private_data;
 	loff_t	curr_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block * sb;
 	int error;
 
@@ -432,7 +432,7 @@ static int ext3_dx_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct fname *fname;
 	int	ret;
 
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index e96c388047e..881f6365c41 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -52,7 +52,7 @@ ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t ret;
 	int err;
 
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 12daa686957..9b8090d94e6 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -257,7 +257,7 @@ flags_err:
 #ifdef CONFIG_COMPAT
 long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 60d2f9dbdb0..4df39c4315e 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -602,7 +602,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
 		       start_minor_hash));
-	dir = dir_file->f_dentry->d_inode;
+	dir = dir_file->f_path.dentry->d_inode;
 	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
 		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
 		hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
@@ -613,7 +613,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	}
 	hinfo.hash = start_hash;
 	hinfo.minor_hash = 0;
-	frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+	frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
 	if (!frame)
 		return err;
 
-- 
cgit v1.2.3


From 9d549890e69156800878a486f832e92d8f3d7c97 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:38 -0800
Subject: [PATCH] ext4: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the ext4
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/dir.c   | 8 ++++----
 fs/ext4/file.c  | 2 +-
 fs/ext4/ioctl.c | 2 +-
 fs/ext4/namei.c | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index f2ed3e7fb9f..da80368b66f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -103,7 +103,7 @@ static int ext4_readdir(struct file * filp,
 	struct ext4_dir_entry_2 *de;
 	struct super_block *sb;
 	int err;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret = 0;
 
 	sb = inode->i_sb;
@@ -122,7 +122,7 @@ static int ext4_readdir(struct file * filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
+		EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
 	}
 #endif
 	stored = 0;
@@ -402,7 +402,7 @@ static int call_filldir(struct file * filp, void * dirent,
 {
 	struct dir_private_info *info = filp->private_data;
 	loff_t	curr_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block * sb;
 	int error;
 
@@ -432,7 +432,7 @@ static int ext4_dx_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct fname *fname;
 	int	ret;
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0b622c0624b..3bbc24b5878 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -52,7 +52,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t ret;
 	int err;
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 22a737c306c..500567dd53b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -256,7 +256,7 @@ flags_err:
 #ifdef CONFIG_COMPAT
 long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 859990eac50..e5a74a5ac26 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -602,7 +602,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
 		       start_minor_hash));
-	dir = dir_file->f_dentry->d_inode;
+	dir = dir_file->f_path.dentry->d_inode;
 	if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
 		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
@@ -613,7 +613,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	}
 	hinfo.hash = start_hash;
 	hinfo.minor_hash = 0;
-	frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+	frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
 	if (!frame)
 		return err;
 
-- 
cgit v1.2.3


From dba32306099d6155b773ebe8fc5bcfab60d075d6 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:39 -0800
Subject: [PATCH] fat: change uses of f_{dentry,vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the fat
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/dir.c  | 6 +++---
 fs/fat/file.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 69c439f4438..c16af246d24 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -579,7 +579,7 @@ parse_record:
 	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
 		inum = inode->i_ino;
 	else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
-		inum = parent_ino(filp->f_dentry);
+		inum = parent_ino(filp->f_path.dentry);
 	} else {
 		loff_t i_pos = fat_make_i_pos(sb, bh, de);
 		struct inode *tmp = fat_iget(sb, i_pos);
@@ -643,7 +643,7 @@ out:
 
 static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
 }
 
@@ -782,7 +782,7 @@ static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
 
 	set_fs(KERNEL_DS);
 	lock_kernel();
-	ret = fat_dir_ioctl(file->f_dentry->d_inode, file,
+	ret = fat_dir_ioctl(file->f_path.dentry->d_inode, file,
 			    cmd, (unsigned long) &d);
 	unlock_kernel();
 	set_fs(oldfs);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 0aa813d944a..c1237b70c1f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -92,7 +92,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		}
 
 		/* This MUST be done before doing anything irreversible... */
-		err = notify_change(filp->f_dentry, &ia);
+		err = notify_change(filp->f_path.dentry, &ia);
 		if (err)
 			goto up;
 
-- 
cgit v1.2.3


From 2485822d51f8b338d289abe00eb7ce5249794a08 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:40 -0800
Subject: [PATCH] isofs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the isofs
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/isofs/compress.c | 2 +-
 fs/isofs/dir.c      | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 731816332b1..6bbbdb53581 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -42,7 +42,7 @@ static struct semaphore zisofs_zlib_semaphore;
  */
 static int zisofs_readpage(struct file *file, struct page *page)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned int maxpage, xpage, fpage, blockindex;
 	unsigned long offset;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 27e276987fd..4af2548f97a 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -183,7 +183,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		/* Handle the case of the '..' directory */
 		if (de->name_len[0] == 1 && de->name[0] == 1) {
-			inode_number = parent_ino(filp->f_dentry);
+			inode_number = parent_ino(filp->f_path.dentry);
 			if (filldir(dirent, "..", 2, filp->f_pos, inode_number, DT_DIR) < 0)
 				break;
 			filp->f_pos += de_len;
@@ -255,8 +255,7 @@ static int isofs_readdir(struct file *filp,
 	int result;
 	char * tmpname;
 	struct iso_directory_record * tmpde;
-	struct inode *inode = filp->f_dentry->d_inode;
-
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	tmpname = (char *)__get_free_page(GFP_KERNEL);
 	if (tmpname == NULL)
-- 
cgit v1.2.3


From 01cce933d8b524d9312f5098c70fa1b6ac190572 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:40 -0800
Subject: [PATCH] nfs: change uses of f_{dentry,vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the nfs
client code.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c      | 18 +++++++++---------
 fs/nfs/direct.c   | 10 +++++-----
 fs/nfs/file.c     | 14 +++++++-------
 fs/nfs/idmap.c    |  2 +-
 fs/nfs/inode.c    |  6 +++---
 fs/nfs/nfs3proc.c |  2 +-
 fs/nfs/proc.c     |  2 +-
 fs/nfs/write.c    |  4 ++--
 8 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b34cd16f472..dee3d6c0f19 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -172,7 +172,7 @@ static
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 {
 	struct file	*file = desc->file;
-	struct inode	*inode = file->f_dentry->d_inode;
+	struct inode	*inode = file->f_path.dentry->d_inode;
 	struct rpc_cred	*cred = nfs_file_cred(file);
 	unsigned long	timestamp;
 	int		error;
@@ -183,7 +183,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 
  again:
 	timestamp = jiffies;
-	error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
+	error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
 					  NFS_SERVER(inode)->dtsize, desc->plus);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
@@ -308,7 +308,7 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
 static inline
 int find_dirent_page(nfs_readdir_descriptor_t *desc)
 {
-	struct inode	*inode = desc->file->f_dentry->d_inode;
+	struct inode	*inode = desc->file->f_path.dentry->d_inode;
 	struct page	*page;
 	int		status;
 
@@ -464,7 +464,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		     filldir_t filldir)
 {
 	struct file	*file = desc->file;
-	struct inode	*inode = file->f_dentry->d_inode;
+	struct inode	*inode = file->f_path.dentry->d_inode;
 	struct rpc_cred	*cred = nfs_file_cred(file);
 	struct page	*page = NULL;
 	int		status;
@@ -477,7 +477,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		status = -ENOMEM;
 		goto out;
 	}
-	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie,
+	desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
@@ -516,7 +516,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
  */
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct dentry	*dentry = filp->f_dentry;
+	struct dentry	*dentry = filp->f_path.dentry;
 	struct inode	*inode = dentry->d_inode;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
@@ -599,7 +599,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 {
-	mutex_lock(&filp->f_dentry->d_inode->i_mutex);
+	mutex_lock(&filp->f_path.dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += filp->f_pos;
@@ -615,7 +615,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
 	}
 out:
-	mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -1102,7 +1102,7 @@ no_open:
 
 static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 {
-	struct dentry *parent = desc->file->f_dentry;
+	struct dentry *parent = desc->file->f_path.dentry;
 	struct inode *dir = parent->d_inode;
 	struct nfs_entry *entry = desc->entry;
 	struct dentry *dentry, *alias;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f9d678f4ae0..bd21d7fde65 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -116,7 +116,7 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
 {
 	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
-			iocb->ki_filp->f_dentry->d_name.name,
+			iocb->ki_filp->f_path.dentry->d_name.name,
 			(long long) pos, nr_segs);
 
 	return -EINVAL;
@@ -734,8 +734,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	size_t count = iov[0].iov_len;
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
-		file->f_dentry->d_parent->d_name.name,
-		file->f_dentry->d_name.name,
+		file->f_path.dentry->d_parent->d_name.name,
+		file->f_path.dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
@@ -798,8 +798,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	size_t count = iov[0].iov_len;
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
-		file->f_dentry->d_parent->d_name.name,
-		file->f_dentry->d_name.name,
+		file->f_path.dentry->d_parent->d_name.name,
+		file->f_path.dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8e28bffc35a..0dd6be346aa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -176,7 +176,7 @@ static int
 nfs_file_flush(struct file *file, fl_owner_t id)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = file->f_dentry->d_inode;
+	struct inode	*inode = file->f_path.dentry->d_inode;
 	int		status;
 
 	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
@@ -201,7 +201,7 @@ static ssize_t
 nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
-	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
 	struct inode * inode = dentry->d_inode;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
@@ -226,7 +226,7 @@ static ssize_t
 nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
 		read_actor_t actor, void *target)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	ssize_t res;
 
@@ -243,7 +243,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
 static int
 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int	status;
 
@@ -343,7 +343,7 @@ const struct address_space_operations nfs_file_aops = {
 static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
 	struct inode * inode = dentry->d_inode;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
@@ -535,8 +535,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
-			filp->f_dentry->d_inode->i_sb->s_id,
-			filp->f_dentry->d_inode->i_ino,
+			filp->f_path.dentry->d_inode->i_sb->s_id,
+			filp->f_path.dentry->d_inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
 	/*
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 82ad7110a1c..9d4a6b2d199 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -377,7 +377,7 @@ idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
-        struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+        struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
 	struct idmap *idmap = (struct idmap *)rpci->private;
 	struct idmap_msg im_in, *im = &idmap->idmap_im;
 	struct idmap_hashtable *h;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 36680d1061b..63e47027930 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -496,7 +496,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  */
 static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	filp->private_data = get_nfs_open_context(ctx);
@@ -528,7 +528,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 
 static void nfs_file_clear_open_context(struct file *filp)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
 
 	if (ctx) {
@@ -551,7 +551,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
+	ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 510ae524f3f..acd8fe9762d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -841,7 +841,7 @@ static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+	return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
 }
 
 const struct nfs_rpc_ops nfs_v3_clientops = {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 10f5e80ca15..560536ad74a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -651,7 +651,7 @@ nfs_proc_commit_setup(struct nfs_write_data *data, int how)
 static int
 nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+	return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
 }
 
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 594eb16879e..345492e7864 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -754,8 +754,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
 
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
-		file->f_dentry->d_parent->d_name.name,
-		file->f_dentry->d_name.name, count,
+		file->f_path.dentry->d_parent->d_name.name,
+		file->f_path.dentry->d_name.name, count,
 		(long long)(page_offset(page) +offset));
 
 	/* If we're not using byte range locks, and we know the page
-- 
cgit v1.2.3


From 7eaa36e2d470ed63bf0c4e4dd8b09cc4a9e1c481 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:41 -0800
Subject: [PATCH] nfsd: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the nfs
server code.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 10 +++++-----
 fs/nfsd/nfsctl.c    |  2 +-
 fs/nfsd/vfs.c       | 16 ++++++++--------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 640c92b2a9f..b7179bd45a1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1310,7 +1310,7 @@ static inline void
 nfs4_file_downgrade(struct file *filp, unsigned int share_access)
 {
 	if (share_access & NFS4_SHARE_ACCESS_WRITE) {
-		put_write_access(filp->f_dentry->d_inode);
+		put_write_access(filp->f_path.dentry->d_inode);
 		filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
 	}
 }
@@ -1623,7 +1623,7 @@ static __be32
 nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
 {
 	struct file *filp = stp->st_vfs_file;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	unsigned int share_access, new_writer;
 	__be32 status;
 
@@ -1965,7 +1965,7 @@ search_close_lru(u32 st_id, int flags)
 static inline int
 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
 {
-	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_dentry->d_inode;
+	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
 }
 
 static int
@@ -2862,7 +2862,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	 * only the dentry:inode set.
 	 */
 	memset(&file, 0, sizeof (struct file));
-	file.f_dentry = current_fh->fh_dentry;
+	file.f_path.dentry = current_fh->fh_dentry;
 
 	status = nfs_ok;
 	if (posix_test_lock(&file, &file_lock, &conflock)) {
@@ -2952,7 +2952,7 @@ static int
 check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
 {
 	struct file_lock **flpp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int status = 0;
 
 	lock_kernel();
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 39aed901514..eedf2e3990a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -111,7 +111,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
 {
-	ino_t ino =  file->f_dentry->d_inode->i_ino;
+	ino_t ino =  file->f_path.dentry->d_inode->i_ino;
 	char *data;
 	ssize_t rv;
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bb4d926e448..d610edde938 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -736,10 +736,10 @@ static int
 nfsd_sync(struct file *filp)
 {
         int err;
-	struct inode *inode = filp->f_dentry->d_inode;
-	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
 	mutex_lock(&inode->i_mutex);
-	err=nfsd_dosync(filp, filp->f_dentry, filp->f_op);
+	err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
 	mutex_unlock(&inode->i_mutex);
 
 	return err;
@@ -845,7 +845,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	int		host_err;
 
 	err = nfserr_perm;
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 #ifdef MSNFS
 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 		(!lock_may_read(inode, offset, *count)))
@@ -883,7 +883,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		nfsdstats.io_read += host_err;
 		*count = host_err;
 		err = 0;
-		fsnotify_access(file->f_dentry);
+		fsnotify_access(file->f_path.dentry);
 	} else 
 		err = nfserrno(host_err);
 out:
@@ -917,11 +917,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	err = nfserr_perm;
 
 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-		(!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
+		(!lock_may_write(file->f_path.dentry->d_inode, offset, cnt)))
 		goto out;
 #endif
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
 	exp   = fhp->fh_export;
 
@@ -950,7 +950,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	set_fs(oldfs);
 	if (host_err >= 0) {
 		nfsdstats.io_write += cnt;
-		fsnotify_modify(file->f_dentry);
+		fsnotify_modify(file->f_path.dentry);
 	}
 
 	/* clear setuid/setgid flag after write */
-- 
cgit v1.2.3


From 20d29372d3facfd078b4c7703637b951914d6759 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:41 -0800
Subject: [PATCH] ntfs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the ntfs
filesystem code.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ntfs/dir.c  | 6 +++---
 fs/ntfs/file.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 85c36b8ca45..8296c29ae3b 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1101,7 +1101,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
 	loff_t fpos, i_size;
-	struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode;
+	struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode;
 	struct super_block *sb = vdir->i_sb;
 	ntfs_inode *ndir = NTFS_I(vdir);
 	ntfs_volume *vol = NTFS_SB(sb);
@@ -1136,9 +1136,9 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (fpos == 1) {
 		ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
 				"inode 0x%lx, DT_DIR.",
-				(unsigned long)parent_ino(filp->f_dentry));
+				(unsigned long)parent_ino(filp->f_path.dentry));
 		rc = filldir(dirent, "..", 2, fpos,
-				parent_ino(filp->f_dentry), DT_DIR);
+				parent_ino(filp->f_path.dentry), DT_DIR);
 		if (rc)
 			goto done;
 		fpos++;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ae2fe0016d2..076c9420c25 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2162,7 +2162,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 		goto out;
 	if (!count)
 		goto out;
-	err = remove_suid(file->f_dentry);
+	err = remove_suid(file->f_path.dentry);
 	if (err)
 		goto out;
 	file_update_time(file);
-- 
cgit v1.2.3


From d6f787bceeab37cd6b0823872efe01b2a20effb5 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:45 -0800
Subject: [PATCH] 9p: change uses of f_{dentry,vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the 9p
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_addr.c | 2 +-
 fs/9p/vfs_dir.c  | 4 ++--
 fs/9p/vfs_file.c | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9dfd259a70b..cc24abf232d 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -54,7 +54,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 	int retval = -EIO;
 	loff_t offset = page_offset(page);
 	int count = PAGE_CACHE_SIZE;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 	int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
 	struct v9fs_fid *v9f = filp->private_data;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 905c882f4e2..3129688143e 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -71,7 +71,7 @@ static inline int dt_type(struct v9fs_stat *mistat)
 static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct v9fs_fcall *fcall = NULL;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 	struct v9fs_fid *file = filp->private_data;
 	unsigned int i, n, s;
@@ -80,7 +80,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct v9fs_stat stat;
 	int over = 0;
 
-	dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
+	dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
 
 	fid = file->fid;
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 79e6f9cd734..e86a0715128 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -60,7 +60,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file);
 
-	vfid = v9fs_fid_lookup(file->f_dentry);
+	vfid = v9fs_fid_lookup(file->f_path.dentry);
 	if (!vfid) {
 		dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
 		return -EBADF;
@@ -133,7 +133,7 @@ free_fcall:
 static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	int res = 0;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
 
@@ -161,7 +161,7 @@ static ssize_t
 v9fs_file_read(struct file *filp, char __user * data, size_t count,
 	       loff_t * offset)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 	struct v9fs_fid *v9f = filp->private_data;
 	struct v9fs_fcall *fcall = NULL;
@@ -225,7 +225,7 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
 		size_t count, loff_t * offset)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
 	struct v9fs_fid *v9fid = filp->private_data;
 	struct v9fs_fcall *fcall;
-- 
cgit v1.2.3


From 010596cc311131421c7be3a73ee7174bda16d322 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:45 -0800
Subject: [PATCH] affs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the affs
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/affs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 5d9649fa181..cad3ee34006 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -41,7 +41,7 @@ struct inode_operations affs_dir_inode_operations = {
 static int
 affs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode		*inode = filp->f_dentry->d_inode;
+	struct inode		*inode = filp->f_path.dentry->d_inode;
 	struct super_block	*sb = inode->i_sb;
 	struct buffer_head	*dir_bh;
 	struct buffer_head	*fh_bh;
@@ -71,7 +71,7 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		stored++;
 	}
 	if (f_pos == 1) {
-		if (filldir(dirent, "..", 2, f_pos, parent_ino(filp->f_dentry), DT_DIR) < 0)
+		if (filldir(dirent, "..", 2, f_pos, parent_ino(filp->f_path.dentry), DT_DIR) < 0)
 			return stored;
 		filp->f_pos = f_pos = 2;
 		stored++;
-- 
cgit v1.2.3


From 81ed19b076f518e793283ccbd004fd0888877201 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:46 -0800
Subject: [PATCH] autofs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the autofs
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/root.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 368a1c33a3c..e698c51d2b0 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -45,7 +45,7 @@ static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldi
 	struct autofs_dir_ent *ent = NULL;
 	struct autofs_dirhash *dirhash;
 	struct autofs_sb_info *sbi;
-	struct inode * inode = filp->f_dentry->d_inode;
+	struct inode * inode = filp->f_path.dentry->d_inode;
 	off_t onr, nr;
 
 	lock_kernel();
@@ -557,7 +557,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs_get_set_timeout(sbi, argp);
 	case AUTOFS_IOC_EXPIRE:
-		return autofs_expire_run(inode->i_sb, sbi, filp->f_vfsmnt,
+		return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
 					 argp);
 	default:
 		return -ENOSYS;
-- 
cgit v1.2.3


From a4669ed8ed982dab494c5d4f2b32921e5a6531d8 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:46 -0800
Subject: [PATCH] autofs4: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the autofs4
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  3 ++-
 fs/autofs4/root.c     | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b13f32c8aee..216b1a364cc 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -150,7 +150,8 @@ static inline int autofs4_ispending(struct dentry *dentry)
 
 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
 {
-	dst->f_dentry->d_inode->i_atime = src->f_dentry->d_inode->i_atime;
+	dst->f_path.dentry->d_inode->i_atime =
+		src->f_path.dentry->d_inode->i_atime;
 	return;
 }
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c1493524da4..8d05b9f7578 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -74,7 +74,7 @@ struct inode_operations autofs4_dir_inode_operations = {
 static int autofs4_root_readdir(struct file *file, void *dirent,
 				filldir_t filldir)
 {
-	struct autofs_sb_info *sbi = autofs4_sbi(file->f_dentry->d_sb);
+	struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
 	int oz_mode = autofs4_oz_mode(sbi);
 
 	DPRINTK("called, filp->f_pos = %lld", file->f_pos);
@@ -95,8 +95,8 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
 
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
-	struct dentry *dentry = file->f_dentry;
-	struct vfsmount *mnt = file->f_vfsmnt;
+	struct dentry *dentry = file->f_path.dentry;
+	struct vfsmount *mnt = file->f_path.mnt;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct dentry *cursor;
 	int status;
@@ -172,7 +172,7 @@ out:
 
 static int autofs4_dir_close(struct inode *inode, struct file *file)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct dentry *cursor = file->private_data;
 	int status = 0;
@@ -204,7 +204,7 @@ out:
 
 static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct dentry *cursor = file->private_data;
 	int status;
@@ -858,14 +858,14 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
 		return autofs4_ask_reghost(sbi, p);
 
 	case AUTOFS_IOC_ASKUMOUNT:
-		return autofs4_ask_umount(filp->f_vfsmnt, p);
+		return autofs4_ask_umount(filp->f_path.mnt, p);
 
 	/* return a single thing to expire */
 	case AUTOFS_IOC_EXPIRE:
-		return autofs4_expire_run(inode->i_sb,filp->f_vfsmnt,sbi, p);
+		return autofs4_expire_run(inode->i_sb,filp->f_path.mnt,sbi, p);
 	/* same as above, but can send multiple expires through pipe */
 	case AUTOFS_IOC_EXPIRE_MULTI:
-		return autofs4_expire_multi(inode->i_sb,filp->f_vfsmnt,sbi, p);
+		return autofs4_expire_multi(inode->i_sb,filp->f_path.mnt,sbi, p);
 
 	default:
 		return -ENOSYS;
-- 
cgit v1.2.3


From 867fa491a2722cee6964a30dfda86e0e02dcb400 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:47 -0800
Subject: [PATCH] configfs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the
configfs filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/configfs/dir.c  | 10 +++++-----
 fs/configfs/file.c | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c398861f78a..1814ba44680 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -980,7 +980,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
 
 static int configfs_dir_open(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
@@ -993,7 +993,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 
 static int configfs_dir_close(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	struct configfs_dirent * cursor = file->private_data;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
@@ -1013,7 +1013,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd)
 
 static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
 	struct configfs_dirent *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->s_sibling;
@@ -1070,7 +1070,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 
 static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
@@ -1080,7 +1080,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index cf33fac68c8..2a7cb086e80 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -134,7 +134,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
 
 	down(&buffer->sem);
 	if (buffer->needs_read_fill) {
-		if ((retval = fill_read_buffer(file->f_dentry,buffer)))
+		if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
 			goto out;
 	}
 	pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
@@ -222,7 +222,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
 	down(&buffer->sem);
 	len = fill_write_buffer(buffer, buf, count);
 	if (len > 0)
-		len = flush_write_buffer(file->f_dentry, buffer, count);
+		len = flush_write_buffer(file->f_path.dentry, buffer, count);
 	if (len > 0)
 		*ppos += len;
 	up(&buffer->sem);
@@ -231,8 +231,8 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
 
 static int check_perm(struct inode * inode, struct file * file)
 {
-	struct config_item *item = configfs_get_config_item(file->f_dentry->d_parent);
-	struct configfs_attribute * attr = to_attr(file->f_dentry);
+	struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent);
+	struct configfs_attribute * attr = to_attr(file->f_path.dentry);
 	struct configfs_buffer * buffer;
 	struct configfs_item_operations * ops = NULL;
 	int error = 0;
@@ -305,8 +305,8 @@ static int configfs_open_file(struct inode * inode, struct file * filp)
 
 static int configfs_release(struct inode * inode, struct file * filp)
 {
-	struct config_item * item = to_item(filp->f_dentry->d_parent);
-	struct configfs_attribute * attr = to_attr(filp->f_dentry);
+	struct config_item * item = to_item(filp->f_path.dentry->d_parent);
+	struct configfs_attribute * attr = to_attr(filp->f_path.dentry);
 	struct module * owner = attr->ca_owner;
 	struct configfs_buffer * buffer = filp->private_data;
 
-- 
cgit v1.2.3


From e6a002964cf376c2acb1d67c4741044dcd3b1622 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:48 -0800
Subject: [PATCH] cifs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the cifs
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/cifsfs.c  |   4 +-
 fs/cifs/fcntl.c   |   4 +-
 fs/cifs/file.c    | 114 +++++++++++++++++++++++++++---------------------------
 fs/cifs/readdir.c |  32 +++++++--------
 4 files changed, 77 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 71bc87a37fc..10c90294cd1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -498,7 +498,7 @@ cifs_get_sb(struct file_system_type *fs_type,
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
+	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
 	ssize_t written;
 
 	written = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -511,7 +511,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
 	if (origin == SEEK_END) {
-		int retval = cifs_revalidate(file->f_dentry);
+		int retval = cifs_revalidate(file->f_path.dentry);
 		if (retval < 0)
 			return (loff_t)retval;
 	}
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index d91a3d44e9e..da12b482ebe 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -83,10 +83,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 		return 0;
 
 	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
-	full_path = build_path_from_dentry(file->f_dentry);
+	full_path = build_path_from_dentry(file->f_path.dentry);
 
 	if(full_path == NULL) {
 		rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2436ed8fc84..1aa95a50cac 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -122,34 +122,34 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 	/* if not oplocked, invalidate inode pages if mtime or file
 	   size changed */
 	temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
-	if (timespec_equal(&file->f_dentry->d_inode->i_mtime, &temp) && 
-			   (file->f_dentry->d_inode->i_size == 
+	if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
+			   (file->f_path.dentry->d_inode->i_size ==
 			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
 		cFYI(1, ("inode unchanged on server"));
 	} else {
-		if (file->f_dentry->d_inode->i_mapping) {
+		if (file->f_path.dentry->d_inode->i_mapping) {
 		/* BB no need to lock inode until after invalidate
 		   since namei code should already have it locked? */
-			filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
+			filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
 		}
 		cFYI(1, ("invalidating remote inode since open detected it "
 			 "changed"));
-		invalidate_remote_inode(file->f_dentry->d_inode);
+		invalidate_remote_inode(file->f_path.dentry->d_inode);
 	}
 
 client_can_cache:
 	if (pTcon->ses->capabilities & CAP_UNIX)
-		rc = cifs_get_inode_info_unix(&file->f_dentry->d_inode,
+		rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
 			full_path, inode->i_sb, xid);
 	else
-		rc = cifs_get_inode_info(&file->f_dentry->d_inode,
+		rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
 			full_path, buf, inode->i_sb, xid);
 
 	if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
 		pCifsInode->clientCanCacheAll = TRUE;
 		pCifsInode->clientCanCacheRead = TRUE;
 		cFYI(1, ("Exclusive Oplock granted on inode %p",
-			 file->f_dentry->d_inode));
+			 file->f_path.dentry->d_inode));
 	} else if ((*oplock & 0xF) == OPLOCK_READ)
 		pCifsInode->clientCanCacheRead = TRUE;
 
@@ -178,7 +178,7 @@ int cifs_open(struct inode *inode, struct file *file)
 
 	if (file->f_flags & O_CREAT) {
 		/* search inode for this file and fill in file->private_data */
-		pCifsInode = CIFS_I(file->f_dentry->d_inode);
+		pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
 		read_lock(&GlobalSMBSeslock);
 		list_for_each(tmp, &pCifsInode->openFileList) {
 			pCifsFile = list_entry(tmp, struct cifsFileInfo,
@@ -206,7 +206,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		}
 	}
 
-	full_path = build_path_from_dentry(file->f_dentry);
+	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
 		FreeXid(xid);
 		return -ENOMEM;
@@ -291,7 +291,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	write_lock(&GlobalSMBSeslock);
 	list_add(&pCifsFile->tlist, &pTcon->openFileList);
 
-	pCifsInode = CIFS_I(file->f_dentry->d_inode);
+	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
 	if (pCifsInode) {
 		rc = cifs_open_inode_helper(inode, file, pCifsInode,
 					    pCifsFile, pTcon,
@@ -366,7 +366,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
 		return 0;
 	}
 
-	if (file->f_dentry == NULL) {
+	if (file->f_path.dentry == NULL) {
 		up(&pCifsFile->fh_sem);
 		cFYI(1, ("failed file reopen, no valid name if dentry freed"));
 		FreeXid(xid);
@@ -378,7 +378,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
    those that already have the rename sem can end up causing writepage
    to get called and if the server was down that means we end up here,
    and we can never tell if the caller already has the rename_sem */
-	full_path = build_path_from_dentry(file->f_dentry);
+	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
 		up(&pCifsFile->fh_sem);
 		FreeXid(xid);
@@ -444,7 +444,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
 				pCifsInode->clientCanCacheAll = TRUE;
 				pCifsInode->clientCanCacheRead = TRUE;
 				cFYI(1, ("Exclusive Oplock granted on inode %p",
-					 file->f_dentry->d_inode));
+					 file->f_path.dentry->d_inode));
 			} else if ((oplock & 0xF) == OPLOCK_READ) {
 				pCifsInode->clientCanCacheRead = TRUE;
 				pCifsInode->clientCanCacheAll = FALSE;
@@ -551,7 +551,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 
 	if (pCFileStruct) {
 		struct cifsTconInfo *pTcon;
-		struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+		struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 
 		pTcon = cifs_sb->tcon;
 
@@ -664,7 +664,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 	} else
 		cFYI(1, ("Unknown type of lock"));
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
@@ -791,10 +791,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 	int xid, long_op;
 	struct cifsFileInfo *open_file;
 
-	if (file->f_dentry == NULL)
+	if (file->f_path.dentry == NULL)
 		return -EBADF;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	if (cifs_sb == NULL)
 		return -EBADF;
 
@@ -802,7 +802,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 
 	/* cFYI(1,
 	   (" write %d bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_dentry->d_name.name)); */
+	   *poffset, file->f_path.dentry->d_name.name)); */
 
 	if (file->private_data == NULL)
 		return -EBADF;
@@ -810,12 +810,12 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 		open_file = (struct cifsFileInfo *) file->private_data;
 	
 	xid = GetXid();
-	if (file->f_dentry->d_inode == NULL) {
+	if (file->f_path.dentry->d_inode == NULL) {
 		FreeXid(xid);
 		return -EBADF;
 	}
 
-	if (*poffset > file->f_dentry->d_inode->i_size)
+	if (*poffset > file->f_path.dentry->d_inode->i_size)
 		long_op = 2; /* writes past end of file can take a long time */
 	else
 		long_op = 1;
@@ -840,8 +840,8 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 					return -EBADF;
 			}
 			if (open_file->invalidHandle) {
-				if ((file->f_dentry == NULL) ||
-				    (file->f_dentry->d_inode == NULL)) {
+				if ((file->f_path.dentry == NULL) ||
+				    (file->f_path.dentry->d_inode == NULL)) {
 					FreeXid(xid);
 					return total_written;
 				}
@@ -849,7 +849,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 				   filemap_fdatawait from here so tell
 				   reopen_file not to flush data to server
 				   now */
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
+				rc = cifs_reopen_file(file->f_path.dentry->d_inode,
 					file, FALSE);
 				if (rc != 0)
 					break;
@@ -878,17 +878,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 	cifs_stats_bytes_written(pTcon, total_written);
 
 	/* since the write may have blocked check these pointers again */
-	if (file->f_dentry) {
-		if (file->f_dentry->d_inode) {
-			struct inode *inode = file->f_dentry->d_inode;
+	if (file->f_path.dentry) {
+		if (file->f_path.dentry->d_inode) {
+			struct inode *inode = file->f_path.dentry->d_inode;
 			inode->i_ctime = inode->i_mtime =
 				current_fs_time(inode->i_sb);
 			if (total_written > 0) {
-				if (*poffset > file->f_dentry->d_inode->i_size)
-					i_size_write(file->f_dentry->d_inode,
+				if (*poffset > file->f_path.dentry->d_inode->i_size)
+					i_size_write(file->f_path.dentry->d_inode,
 					*poffset);
 			}
-			mark_inode_dirty_sync(file->f_dentry->d_inode);
+			mark_inode_dirty_sync(file->f_path.dentry->d_inode);
 		}
 	}
 	FreeXid(xid);
@@ -906,17 +906,17 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 	int xid, long_op;
 	struct cifsFileInfo *open_file;
 
-	if (file->f_dentry == NULL)
+	if (file->f_path.dentry == NULL)
 		return -EBADF;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	if (cifs_sb == NULL)
 		return -EBADF;
 
 	pTcon = cifs_sb->tcon;
 
 	cFYI(1,("write %zd bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_dentry->d_name.name));
+	   *poffset, file->f_path.dentry->d_name.name));
 
 	if (file->private_data == NULL)
 		return -EBADF;
@@ -924,12 +924,12 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 		open_file = (struct cifsFileInfo *)file->private_data;
 	
 	xid = GetXid();
-	if (file->f_dentry->d_inode == NULL) {
+	if (file->f_path.dentry->d_inode == NULL) {
 		FreeXid(xid);
 		return -EBADF;
 	}
 
-	if (*poffset > file->f_dentry->d_inode->i_size)
+	if (*poffset > file->f_path.dentry->d_inode->i_size)
 		long_op = 2; /* writes past end of file can take a long time */
 	else
 		long_op = 1;
@@ -955,8 +955,8 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 					return -EBADF;
 			}
 			if (open_file->invalidHandle) {
-				if ((file->f_dentry == NULL) ||
-				   (file->f_dentry->d_inode == NULL)) {
+				if ((file->f_path.dentry == NULL) ||
+				   (file->f_path.dentry->d_inode == NULL)) {
 					FreeXid(xid);
 					return total_written;
 				}
@@ -964,7 +964,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 				   filemap_fdatawait from here so tell
 				   reopen_file not to flush data to 
 				   server now */
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
+				rc = cifs_reopen_file(file->f_path.dentry->d_inode,
 					file, FALSE);
 				if (rc != 0)
 					break;
@@ -1011,16 +1011,16 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
 	cifs_stats_bytes_written(pTcon, total_written);
 
 	/* since the write may have blocked check these pointers again */
-	if (file->f_dentry) {
-		if (file->f_dentry->d_inode) {
-			file->f_dentry->d_inode->i_ctime = 
-			file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+	if (file->f_path.dentry) {
+		if (file->f_path.dentry->d_inode) {
+			file->f_path.dentry->d_inode->i_ctime =
+			file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;
 			if (total_written > 0) {
-				if (*poffset > file->f_dentry->d_inode->i_size)
-					i_size_write(file->f_dentry->d_inode, 
+				if (*poffset > file->f_path.dentry->d_inode->i_size)
+					i_size_write(file->f_path.dentry->d_inode,
 						     *poffset);
 			}
-			mark_inode_dirty_sync(file->f_dentry->d_inode);
+			mark_inode_dirty_sync(file->f_path.dentry->d_inode);
 		}
 	}
 	FreeXid(xid);
@@ -1384,7 +1384,7 @@ static int cifs_commit_write(struct file *file, struct page *page,
 				if ((open_file->invalidHandle) && 
 				    (!open_file->closePend)) {
 					rc = cifs_reopen_file(
-						file->f_dentry->d_inode, file);
+						file->f_path.dentry->d_inode, file);
 					if (rc != 0)
 						break;
 				}
@@ -1434,7 +1434,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	int xid;
 	int rc = 0;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 
 	xid = GetXid();
 
@@ -1482,7 +1482,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
  */
 int cifs_flush(struct file *file, fl_owner_t id)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode * inode = file->f_path.dentry->d_inode;
 	int rc = 0;
 
 	/* Rather than do the steps manually:
@@ -1519,7 +1519,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 	struct smb_com_read_rsp *pSMBr;
 
 	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
@@ -1542,7 +1542,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 			int buf_type = CIFS_NO_BUFFER;
 			if ((open_file->invalidHandle) && 
 			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
+				rc = cifs_reopen_file(file->f_path.dentry->d_inode,
 					file, TRUE);
 				if (rc != 0)
 					break;
@@ -1601,7 +1601,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	int buf_type = CIFS_NO_BUFFER;
 
 	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
@@ -1629,7 +1629,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 		while (rc == -EAGAIN) {
 			if ((open_file->invalidHandle) && 
 			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
+				rc = cifs_reopen_file(file->f_path.dentry->d_inode,
 					file, TRUE);
 				if (rc != 0)
 					break;
@@ -1658,7 +1658,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 
 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	int rc, xid;
 
 	xid = GetXid();
@@ -1744,7 +1744,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		return -EBADF;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
 	pagevec_init(&lru_pvec, 0);
@@ -1786,7 +1786,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		while (rc == -EAGAIN) {
 			if ((open_file->invalidHandle) && 
 			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
+				rc = cifs_reopen_file(file->f_path.dentry->d_inode,
 					file, TRUE);
 				if (rc != 0)
 					break;
@@ -1880,8 +1880,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	else
 		cFYI(1, ("Bytes read %d",rc));
                                                                                                                            
-	file->f_dentry->d_inode->i_atime =
-		current_fs_time(file->f_dentry->d_inode->i_sb);
+	file->f_path.dentry->d_inode->i_atime =
+		current_fs_time(file->f_path.dentry->d_inode->i_sb);
                                                                                                                            
 	if (PAGE_CACHE_SIZE > rc)
 		memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ed18c3965f7..99dfb5337e3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -68,30 +68,30 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
 	int rc = 0;
 
 	cFYI(1, ("For %s", qstring->name));
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 
 	qstring->hash = full_name_hash(qstring->name, qstring->len);
-	tmp_dentry = d_lookup(file->f_dentry, qstring);
+	tmp_dentry = d_lookup(file->f_path.dentry, qstring);
 	if (tmp_dentry) {
 		cFYI(0, ("existing dentry with inode 0x%p", tmp_dentry->d_inode));
 		*ptmp_inode = tmp_dentry->d_inode;
 /* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/
 		if(*ptmp_inode == NULL) {
-			*ptmp_inode = new_inode(file->f_dentry->d_sb);
+			*ptmp_inode = new_inode(file->f_path.dentry->d_sb);
 			if(*ptmp_inode == NULL)
 				return rc;
 			rc = 1;
 		}
 	} else {
-		tmp_dentry = d_alloc(file->f_dentry, qstring);
+		tmp_dentry = d_alloc(file->f_path.dentry, qstring);
 		if(tmp_dentry == NULL) {
 			cERROR(1,("Failed allocating dentry"));
 			*ptmp_inode = NULL;
 			return rc;
 		}
 
-		*ptmp_inode = new_inode(file->f_dentry->d_sb);
+		*ptmp_inode = new_inode(file->f_path.dentry->d_sb);
 		if (pTcon->nocase)
 			tmp_dentry->d_op = &cifs_ci_dentry_ops;
 		else
@@ -432,10 +432,10 @@ static int initiate_cifs_search(const int xid, struct file *file)
 	cifsFile->invalidHandle = TRUE;
 	cifsFile->srch_inf.endOfSearch = FALSE;
 
-	if(file->f_dentry == NULL)
+	if(file->f_path.dentry == NULL)
 		return -ENOENT;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	if(cifs_sb == NULL)
 		return -EINVAL;
 
@@ -443,7 +443,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
 	if(pTcon == NULL)
 		return -EINVAL;
 
-	full_path = build_path_from_dentry(file->f_dentry);
+	full_path = build_path_from_dentry(file->f_path.dentry);
 
 	if(full_path == NULL) {
 		return -ENOMEM;
@@ -609,10 +609,10 @@ static int is_dir_changed(struct file * file)
 	struct inode * inode;
 	struct cifsInodeInfo *cifsInfo;
 
-	if(file->f_dentry == NULL)
+	if(file->f_path.dentry == NULL)
 		return 0;
 
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 
 	if(inode == NULL)
 		return 0;
@@ -839,7 +839,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 	if((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL))
 		return -ENOENT;
 
-	if(file->f_dentry == NULL)
+	if(file->f_path.dentry == NULL)
 		return -ENOENT;
 
 	rc = cifs_entry_is_dot(pfindEntry,pCifsF);
@@ -847,7 +847,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 	if(rc != 0) 
 		return 0;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 
 	qstring.name = scratch_buf;
 	rc = cifs_get_name_from_search_buf(&qstring,pfindEntry,
@@ -985,12 +985,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 
 	xid = GetXid();
 
-	if(file->f_dentry == NULL) {
+	if(file->f_path.dentry == NULL) {
 		FreeXid(xid);
 		return -EIO;
 	}
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	pTcon = cifs_sb->tcon;
 	if(pTcon == NULL)
 		return -EINVAL;
@@ -998,7 +998,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 	switch ((int) file->f_pos) {
 	case 0:
 		if (filldir(direntry, ".", 1, file->f_pos,
-		     file->f_dentry->d_inode->i_ino, DT_DIR) < 0) {
+		     file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) {
 			cERROR(1, ("Filldir for current dir failed"));
 			rc = -ENOMEM;
 			break;
@@ -1006,7 +1006,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		file->f_pos++;
 	case 1:
 		if (filldir(direntry, "..", 2, file->f_pos,
-		     file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
+		     file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
 			cERROR(1, ("Filldir for parent dir failed"));
 			rc = -ENOMEM;
 			break;
-- 
cgit v1.2.3


From bd243a4b4b028d65a8178db5b57f8ed2cfc9707d Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:48 -0800
Subject: [PATCH] ecryptfs: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the
ecryptfs filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/file.c  | 14 +++++++-------
 fs/ecryptfs/inode.c |  4 ++--
 fs/ecryptfs/mmap.c  | 16 ++++++++--------
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c48e4590b11..c5a2e5298f1 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -76,7 +76,7 @@ static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin)
 	}
 	ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos);
 	if (expanding_file) {
-		rc = ecryptfs_truncate(file->f_dentry, new_end_pos);
+		rc = ecryptfs_truncate(file->f_path.dentry, new_end_pos);
 		if (rc) {
 			rv = rc;
 			ecryptfs_printk(KERN_ERR, "Error on attempt to "
@@ -117,8 +117,8 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 	if (-EIOCBQUEUED == rc)
 		rc = wait_on_sync_kiocb(iocb);
 	if (rc >= 0) {
-		lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry);
-		lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry);
+		lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
+		lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
 		touch_atime(lower_vfsmount, lower_dentry);
 	}
 	return rc;
@@ -177,10 +177,10 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 
 	lower_file = ecryptfs_file_to_lower(file);
 	lower_file->f_pos = file->f_pos;
-	inode = file->f_dentry->d_inode;
+	inode = file->f_path.dentry->d_inode;
 	memset(&buf, 0, sizeof(buf));
 	buf.dirent = dirent;
-	buf.dentry = file->f_dentry;
+	buf.dentry = file->f_path.dentry;
 	buf.filldir = filldir;
 retry:
 	buf.filldir_called = 0;
@@ -193,7 +193,7 @@ retry:
 		goto retry;
 	file->f_pos = lower_file->f_pos;
 	if (rc >= 0)
-		fsstack_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
+		fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode);
 	return rc;
 }
 
@@ -240,7 +240,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-	struct dentry *ecryptfs_dentry = file->f_dentry;
+	struct dentry *ecryptfs_dentry = file->f_path.dentry;
 	/* Private value of ecryptfs_dentry allocated in
 	 * ecryptfs_lookup() */
 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c07a937b21a..11f5e5076ae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -155,7 +155,7 @@ static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
 	struct ecryptfs_file_info tmp_file_info;
 
 	memset(&fake_file, 0, sizeof(fake_file));
-	fake_file.f_dentry = ecryptfs_dentry;
+	fake_file.f_path.dentry = ecryptfs_dentry;
 	memset(&tmp_file_info, 0, sizeof(tmp_file_info));
 	ecryptfs_set_file_private(&fake_file, &tmp_file_info);
 	ecryptfs_set_file_lower(&fake_file, lower_file);
@@ -754,7 +754,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	 * the file in the underlying filesystem so that the
 	 * truncation has an effect there as well. */
 	memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file));
-	fake_ecryptfs_file.f_dentry = dentry;
+	fake_ecryptfs_file.f_path.dentry = dentry;
 	/* Released at out_free: label */
 	ecryptfs_set_file_private(&fake_ecryptfs_file,
 				  kmem_cache_alloc(ecryptfs_file_info_cache,
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 924dd90a4cf..06843d24f23 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -51,7 +51,7 @@ static struct page *ecryptfs_get1page(struct file *file, int index)
 	struct inode *inode;
 	struct address_space *mapping;
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
 	mapping = inode->i_mapping;
 	page = read_cache_page(mapping, index,
@@ -84,7 +84,7 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
 int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
 {
 	int rc = 0;
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	pgoff_t old_end_page_index = 0;
 	pgoff_t index = old_end_page_index;
@@ -218,7 +218,7 @@ int ecryptfs_do_readpage(struct file *file, struct page *page,
 	char *lower_page_data;
 	const struct address_space_operations *lower_a_ops;
 
-	dentry = file->f_dentry;
+	dentry = file->f_path.dentry;
 	lower_file = ecryptfs_file_to_lower(file);
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	inode = dentry->d_inode;
@@ -275,9 +275,9 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
 	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat;
 
-	BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode));
-	crypt_stat =
-		&ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
+	BUG_ON(!(file && file->f_path.dentry && file->f_path.dentry->d_inode));
+	crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
+			->crypt_stat;
 	if (!crypt_stat
 	    || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)
 	    || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
@@ -638,8 +638,8 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
 	lower_inode = ecryptfs_inode_to_lower(inode);
 	lower_file = ecryptfs_file_to_lower(file);
 	mutex_lock(&lower_inode->i_mutex);
-	crypt_stat =
-		&ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
+	crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
+				->crypt_stat;
 	if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
 		ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
 			"crypt_stat at memory location [%p]\n", crypt_stat);
-- 
cgit v1.2.3


From e678fb0d523c118fc2f903d62cb54c89b6b68185 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:49 -0800
Subject: [PATCH] xfs: change uses of f_{dentry,vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in the xfs
filesystem.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_file.c    | 28 ++++++++++++++--------------
 fs/xfs/linux-2.6/xfs_ioctl.c   | 10 +++++-----
 fs/xfs/linux-2.6/xfs_ioctl32.c |  2 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |  2 +-
 fs/xfs/xfs_dfrag.c             |  4 ++--
 5 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index d93d8dd1958..d26f5cd2ba7 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -55,7 +55,7 @@ __xfs_file_read(
 	loff_t			pos)
 {
 	struct file		*file = iocb->ki_filp;
-	bhv_vnode_t		*vp = vn_from_inode(file->f_dentry->d_inode);
+	bhv_vnode_t		*vp = vn_from_inode(file->f_path.dentry->d_inode);
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
@@ -131,7 +131,7 @@ xfs_file_sendfile(
 	read_actor_t		actor,
 	void			*target)
 {
-	return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
+	return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
 				filp, pos, 0, count, actor, target, NULL);
 }
 
@@ -143,7 +143,7 @@ xfs_file_sendfile_invis(
 	read_actor_t		actor,
 	void			*target)
 {
-	return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
+	return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
 				filp, pos, IO_INVIS, count, actor, target, NULL);
 }
 
@@ -155,7 +155,7 @@ xfs_file_splice_read(
 	size_t			len,
 	unsigned int		flags)
 {
-	return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
+	return bhv_vop_splice_read(vn_from_inode(infilp->f_path.dentry->d_inode),
 				   infilp, ppos, pipe, len, flags, 0, NULL);
 }
 
@@ -167,7 +167,7 @@ xfs_file_splice_read_invis(
 	size_t			len,
 	unsigned int		flags)
 {
-	return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
+	return bhv_vop_splice_read(vn_from_inode(infilp->f_path.dentry->d_inode),
 				   infilp, ppos, pipe, len, flags, IO_INVIS,
 				   NULL);
 }
@@ -180,7 +180,7 @@ xfs_file_splice_write(
 	size_t			len,
 	unsigned int		flags)
 {
-	return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
+	return bhv_vop_splice_write(vn_from_inode(outfilp->f_path.dentry->d_inode),
 				    pipe, outfilp, ppos, len, flags, 0, NULL);
 }
 
@@ -192,7 +192,7 @@ xfs_file_splice_write_invis(
 	size_t			len,
 	unsigned int		flags)
 {
-	return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
+	return bhv_vop_splice_write(vn_from_inode(outfilp->f_path.dentry->d_inode),
 				    pipe, outfilp, ppos, len, flags, IO_INVIS,
 				    NULL);
 }
@@ -212,7 +212,7 @@ xfs_file_close(
 	struct file	*filp,
 	fl_owner_t	id)
 {
-	return -bhv_vop_close(vn_from_inode(filp->f_dentry->d_inode), 0,
+	return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
 				file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
 }
 
@@ -251,7 +251,7 @@ xfs_vm_nopage(
 	unsigned long		address,
 	int			*type)
 {
-	struct inode	*inode = area->vm_file->f_dentry->d_inode;
+	struct inode	*inode = area->vm_file->f_path.dentry->d_inode;
 	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
@@ -268,7 +268,7 @@ xfs_file_readdir(
 	filldir_t	filldir)
 {
 	int		error = 0;
-	bhv_vnode_t	*vp = vn_from_inode(filp->f_dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(filp->f_path.dentry->d_inode);
 	uio_t		uio;
 	iovec_t		iov;
 	int		eof = 0;
@@ -345,7 +345,7 @@ xfs_file_mmap(
 	vma->vm_ops = &xfs_file_vm_ops;
 
 #ifdef CONFIG_XFS_DMAPI
-	if (vn_from_inode(filp->f_dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
+	if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
 		vma->vm_ops = &xfs_dmapi_file_vm_ops;
 #endif /* CONFIG_XFS_DMAPI */
 
@@ -360,7 +360,7 @@ xfs_file_ioctl(
 	unsigned long	p)
 {
 	int		error;
-	struct inode	*inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_path.dentry->d_inode;
 	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	error = bhv_vop_ioctl(vp, inode, filp, 0, cmd, (void __user *)p);
@@ -382,7 +382,7 @@ xfs_file_ioctl_invis(
 	unsigned long	p)
 {
 	int		error;
-	struct inode	*inode = filp->f_dentry->d_inode;
+	struct inode	*inode = filp->f_path.dentry->d_inode;
 	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	error = bhv_vop_ioctl(vp, inode, filp, IO_INVIS, cmd, (void __user *)p);
@@ -404,7 +404,7 @@ xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
-	bhv_vnode_t	*vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(vma->vm_file->f_path.dentry->d_inode);
 	int		error = 0;
 
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 74d094829a4..f011c9cd0d6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -107,9 +107,9 @@ xfs_find_handle(
 		if (!file)
 		    return -EBADF;
 
-		ASSERT(file->f_dentry);
-		ASSERT(file->f_dentry->d_inode);
-		inode = igrab(file->f_dentry->d_inode);
+		ASSERT(file->f_path.dentry);
+		ASSERT(file->f_path.dentry->d_inode);
+		inode = igrab(file->f_path.dentry->d_inode);
 		fput(file);
 		break;
 	}
@@ -333,10 +333,10 @@ xfs_open_by_handle(
 	}
 
 	/* Ensure umount returns EBUSY on umounts while this file is open. */
-	mntget(parfilp->f_vfsmnt);
+	mntget(parfilp->f_path.mnt);
 
 	/* Create file pointer. */
-	filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
+	filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags);
 	if (IS_ERR(filp)) {
 		put_unused_fd(new_fd);
 		return -XFS_ERROR(-PTR_ERR(filp));
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 270db0f3861..b83cebc165f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -112,7 +112,7 @@ xfs_compat_ioctl(
 	unsigned	cmd,
 	unsigned long	arg)
 {
-	struct inode	*inode = file->f_dentry->d_inode;
+	struct inode	*inode = file->f_path.dentry->d_inode;
 	bhv_vnode_t	*vp = vn_from_inode(inode);
 	int		error;
 
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index fa842f1c9fa..65e79b471d4 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -805,7 +805,7 @@ start:
 	     !capable(CAP_FSETID)) {
 		error = xfs_write_clear_setuid(xip);
 		if (likely(!error))
-			error = -remove_suid(file->f_dentry);
+			error = -remove_suid(file->f_path.dentry);
 		if (unlikely(error)) {
 			xfs_iunlock(xip, iolock);
 			goto out_unlock_mutex;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 80562b60fb9..50d0faea371 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -71,7 +71,7 @@ xfs_swapext(
 
 	/* Pull information for the target fd */
 	if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) ||
-	    ((vp = vn_from_inode(fp->f_dentry->d_inode)) == NULL))  {
+	    ((vp = vn_from_inode(fp->f_path.dentry->d_inode)) == NULL))  {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
@@ -83,7 +83,7 @@ xfs_swapext(
 	}
 
 	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
-	    ((tvp = vn_from_inode(tfp->f_dentry->d_inode)) == NULL)) {
+	    ((tvp = vn_from_inode(tfp->f_path.dentry->d_inode)) == NULL)) {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
-- 
cgit v1.2.3


From 514653e25962c37b11fe648e4ef1c1130f697bcb Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:50 -0800
Subject: [PATCH] struct path: convert adfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index d3c7905b2dd..2b8903893d3 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -28,7 +28,7 @@ static DEFINE_RWLOCK(adfs_dir_lock);
 static int
 adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
 	struct object_info obj;
-- 
cgit v1.2.3


From 1d56a96956158d0fb1902c56ffa0e38caa39bddb Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:50 -0800
Subject: [PATCH] struct path: convert afs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/dir.c   | 4 ++--
 fs/afs/mntpt.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a6ec75c56fc..4acd0413405 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -392,10 +392,10 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
 	unsigned fpos;
 	int ret;
 
-	_enter("{%Ld,{%lu}}", file->f_pos, file->f_dentry->d_inode->i_ino);
+	_enter("{%Ld,{%lu}}", file->f_pos, file->f_path.dentry->d_inode->i_ino);
 
 	fpos = file->f_pos;
-	ret = afs_dir_iterate(file->f_dentry->d_inode, &fpos, cookie, filldir);
+	ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, cookie, filldir);
 	file->f_pos = fpos;
 
 	_leave(" = %d", ret);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 99785a79d04..f33b1a81a76 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -136,11 +136,11 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
 {
 	kenter("%p,%p{%p{%s},%s}",
 	       inode, file,
-	       file->f_dentry->d_parent,
-	       file->f_dentry->d_parent ?
-	       file->f_dentry->d_parent->d_name.name :
+	       file->f_path.dentry->d_parent,
+	       file->f_path.dentry->d_parent ?
+	       file->f_path.dentry->d_parent->d_name.name :
 	       (const unsigned char *) "",
-	       file->f_dentry->d_name.name);
+	       file->f_path.dentry->d_name.name);
 
 	return -EREMOTE;
 } /* end afs_mntpt_open() */
-- 
cgit v1.2.3


From 28f375fd6ff266d8e321648b36456f696cfae220 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:52 -0800
Subject: [PATCH] struct path: convert befs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/befs/linuxvfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index bce402eee55..481e59b9d91 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -212,7 +212,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 static int
 befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
 	befs_off_t value;
@@ -222,7 +222,7 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	char keybuf[BEFS_NAME_LEN + 1];
 	char *nlsname;
 	int nlsnamelen;
-	const char *dirname = filp->f_dentry->d_name.name;
+	const char *dirname = filp->f_path.dentry->d_name.name;
 
 	befs_debug(sb, "---> befs_readdir() "
 		   "name %s, inode %ld, filp->f_pos %Ld",
-- 
cgit v1.2.3


From 61a8424ab8f611d199bfc8ca329ec0ea77a4d4ea Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:53 -0800
Subject: [PATCH] struct path: convert bfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a650f1d0b85..2a746e688df 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -27,7 +27,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
 
 static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 {
-	struct inode * dir = f->f_dentry->d_inode;
+	struct inode * dir = f->f_path.dentry->d_inode;
 	struct buffer_head * bh;
 	struct bfs_dirent * de;
 	unsigned int offset;
-- 
cgit v1.2.3


From d4176d326d76ddea457a5e42ac8c2e3aed3430b1 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:56 -0800
Subject: [PATCH] struct path: convert coda

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/coda/dir.c   |  8 ++++----
 fs/coda/file.c  | 14 +++++++-------
 fs/coda/inode.c |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0102b28a15f..0c6f7f3b3dd 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -441,7 +441,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
 /* file operations for directories */
 int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 {
-	struct dentry *coda_dentry = coda_file->f_dentry;
+	struct dentry *coda_dentry = coda_file->f_path.dentry;
 	struct coda_file_info *cfi;
 	struct file *host_file;
 	struct inode *host_inode;
@@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 
 	coda_vfs_stat.readdir++;
 
-	host_inode = host_file->f_dentry->d_inode;
+	host_inode = host_file->f_path.dentry->d_inode;
 	mutex_lock(&host_inode->i_mutex);
 	host_file->f_pos = coda_file->f_pos;
 
@@ -544,14 +544,14 @@ static int coda_venus_readdir(struct file *filp, filldir_t filldir,
 		/* catch truncated reads */
 		if (ret < vdir_size || ret < vdir_size + vdir->d_namlen) {
 			printk("coda_venus_readdir: short read: %ld\n",
-			       filp->f_dentry->d_inode->i_ino);
+			       filp->f_path.dentry->d_inode->i_ino);
 			ret = -EBADF;
 			break;
 		}
 		/* validate whether the directory file actually makes sense */
 		if (vdir->d_reclen < vdir_size + vdir->d_namlen) {
 			printk("coda_venus_readdir: Invalid dir: %ld\n",
-			       filp->f_dentry->d_inode->i_ino);
+			       filp->f_path.dentry->d_inode->i_ino);
 			ret = -EBADF;
 			break;
 		}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index dbfbcfa5b3c..5ef2b609ec7 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -66,7 +66,7 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
 static ssize_t
 coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct inode *host_inode, *coda_inode = coda_file->f_dentry->d_inode;
+	struct inode *host_inode, *coda_inode = coda_file->f_path.dentry->d_inode;
 	struct coda_file_info *cfi;
 	struct file *host_file;
 	ssize_t ret;
@@ -78,7 +78,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 	if (!host_file->f_op || !host_file->f_op->write)
 		return -EINVAL;
 
-	host_inode = host_file->f_dentry->d_inode;
+	host_inode = host_file->f_path.dentry->d_inode;
 	mutex_lock(&coda_inode->i_mutex);
 
 	ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -106,8 +106,8 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	if (!host_file->f_op || !host_file->f_op->mmap)
 		return -ENODEV;
 
-	coda_inode = coda_file->f_dentry->d_inode;
-	host_inode = host_file->f_dentry->d_inode;
+	coda_inode = coda_file->f_path.dentry->d_inode;
+	host_inode = host_file->f_path.dentry->d_inode;
 	coda_file->f_mapping = host_file->f_mapping;
 	if (coda_inode->i_mapping == &coda_inode->i_data)
 		coda_inode->i_mapping = host_inode->i_mapping;
@@ -190,7 +190,7 @@ int coda_flush(struct file *coda_file, fl_owner_t id)
 	cfi = CODA_FTOC(coda_file);
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 
-	coda_inode = coda_file->f_dentry->d_inode;
+	coda_inode = coda_file->f_path.dentry->d_inode;
 
 	err = venus_store(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags,
 			  coda_file->f_uid);
@@ -233,7 +233,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 		err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
 				  coda_flags, coda_file->f_uid);
 
-	host_inode = cfi->cfi_container->f_dentry->d_inode;
+	host_inode = cfi->cfi_container->f_path.dentry->d_inode;
 	cii = ITOC(coda_inode);
 
 	/* did we mmap this file? */
@@ -270,7 +270,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	coda_vfs_stat.fsync++;
 
 	if (host_file->f_op && host_file->f_op->fsync) {
-		host_dentry = host_file->f_dentry;
+		host_dentry = host_file->f_path.dentry;
 		host_inode = host_dentry->d_inode;
 		mutex_lock(&host_inode->i_mutex);
 		err = host_file->f_op->fsync(host_file, host_dentry, datasync);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index b64659fa82d..01395defed8 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -119,7 +119,7 @@ static int get_device_index(struct coda_mount_data *data)
 	file = fget(data->fd);
 	inode = NULL;
 	if(file)
-		inode = file->f_dentry->d_inode;
+		inode = file->f_path.dentry->d_inode;
 	
 	if(!inode || !S_ISCHR(inode->i_mode) ||
 	   imajor(inode) != CODA_PSDEV_MAJOR) {
-- 
cgit v1.2.3


From 352d8af748a1bee0935987b811c9fc4693803d99 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:58 -0800
Subject: [PATCH] struct path: convert cramfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0509cedd415..6db03fb089d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -338,7 +338,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  */
 static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	char *buf;
 	unsigned int offset;
-- 
cgit v1.2.3


From 0191f2055cd16a2252219c621329b52d8b67194e Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:00 -0800
Subject: [PATCH] struct path: convert efs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/efs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 17f5b2d3c16..b46c488eefc 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -20,7 +20,7 @@ struct inode_operations efs_dir_inode_operations = {
 };
 
 static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct buffer_head *bh;
 
 	struct efs_dir		*dirblock;
-- 
cgit v1.2.3


From 6a90cd248deca1ceaf417c9a04bf3be6026e7a64 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:01 -0800
Subject: [PATCH] struct path: convert freevxfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/freevxfs/vxfs_lookup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 43886fa00a2..3995d7fbeda 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -240,7 +240,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd)
 static int
 vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
 {
-	struct inode		*ip = fp->f_dentry->d_inode;
+	struct inode		*ip = fp->f_path.dentry->d_inode;
 	struct super_block	*sbp = ip->i_sb;
 	u_long			bsize = sbp->s_blocksize;
 	u_long			page, npages, block, pblocks, nblocks, offset;
-- 
cgit v1.2.3


From 7706a9d6183da7701a9bca7155bccfcd182c670a Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:02 -0800
Subject: [PATCH] struct path: convert fuse

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/control.c |  2 +-
 fs/fuse/dir.c     |  2 +-
 fs/fuse/file.c    | 18 +++++++++---------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 16b39c053d4..8c58bd45399 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
 {
 	struct fuse_conn *fc;
 	mutex_lock(&fuse_mutex);
-	fc = file->f_dentry->d_inode->i_private;
+	fc = file->f_path.dentry->d_inode->i_private;
 	if (fc)
 		fc = fuse_conn_get(fc);
 	mutex_unlock(&fuse_mutex);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 1cabdb229ad..40080477ceb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -856,7 +856,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	int err;
 	size_t nbytes;
 	struct page *page;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 128f79c4080..1387749201b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -141,8 +141,8 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 					isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
 
 		/* Hold vfsmount and dentry until release is finished */
-		req->vfsmount = mntget(file->f_vfsmnt);
-		req->dentry = dget(file->f_dentry);
+		req->vfsmount = mntget(file->f_path.mnt);
+		req->dentry = dget(file->f_path.dentry);
 		request_send_background(fc, req);
 	}
 
@@ -184,7 +184,7 @@ static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 
 static int fuse_flush(struct file *file, fl_owner_t id)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
 	struct fuse_req *req;
@@ -533,7 +533,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 			      size_t count, loff_t *ppos, int write)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
@@ -607,7 +607,7 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t res;
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
@@ -662,7 +662,7 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 static void fuse_lk_fill(struct fuse_req *req, struct file *file,
 			 const struct file_lock *fl, int opcode, pid_t pid)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
 	struct fuse_lk_in *arg = &req->misc.lk_in;
@@ -682,7 +682,7 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
 
 static int fuse_getlk(struct file *file, struct file_lock *fl)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
 	struct fuse_lk_out outarg;
@@ -707,7 +707,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
 
 static int fuse_setlk(struct file *file, struct file_lock *fl)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
 	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
@@ -734,7 +734,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl)
 
 static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-- 
cgit v1.2.3


From 81454098f7c54118d290cd503bda2a41e3ac43fb Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:03 -0800
Subject: [PATCH] struct path: convert gfs2

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/gfs2/ops_file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index b3f1e0349ae..faa07e4b97d 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -247,7 +247,7 @@ static const u32 gfs2_to_fsflags[32] = {
 
 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
@@ -305,7 +305,7 @@ void gfs2_set_inode_flags(struct inode *inode)
  */
 static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *bh;
@@ -588,7 +588,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct gfs2_file *fp = file->private_data;
 	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
-	struct gfs2_inode *ip = GFS2_I(file->f_dentry->d_inode);
+	struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
 	struct gfs2_glock *gl;
 	unsigned int state;
 	int flags;
-- 
cgit v1.2.3


From c528896004793065b5ffd710e1cd844603ad8518 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:04 -0800
Subject: [PATCH] struct path: convert hfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hfs/dir.c   | 2 +-
 fs/hfs/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 37d681b4f21..e2e0358da33 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -53,7 +53,7 @@ done:
  */
 static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	int len, err;
 	char strbuf[HFS_MAX_NAMELEN];
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 02f5573e034..5cb7f8fee8d 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -102,7 +102,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfs_get_block, NULL);
-- 
cgit v1.2.3


From f44ea03102a021b9c36cc82c7b6312cd23f23b3d Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:04 -0800
Subject: [PATCH] struct path: convert hfsplus

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hfsplus/dir.c   | 2 +-
 fs/hfsplus/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 7e309751645..e886ac8460d 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -111,7 +111,7 @@ fail:
 
 static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	int len, err;
 	char strbuf[HFSPLUS_MAX_STRLEN + 1];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9e367524963..75e8c4d8aac 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -97,7 +97,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
 
 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfsplus_get_block, NULL);
-- 
cgit v1.2.3


From 680b0da9b10d97ddf3c325bf03ae8425ebf4fdec Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:05 -0800
Subject: [PATCH] struct path: convert hostfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_kern.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b6bd33ca373..1e6fc379987 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -35,7 +35,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 	return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
 }
 
-#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
+#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
 
 int hostfs_d_delete(struct dentry *dentry)
 {
@@ -325,7 +325,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	unsigned long long next, ino;
 	int error, len;
 
-	name = dentry_name(file->f_dentry, 0);
+	name = dentry_name(file->f_path.dentry, 0);
 	if(name == NULL) return(-ENOMEM);
 	dir = open_dir(name, &error);
 	kfree(name);
@@ -366,7 +366,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
 	if(w)
 		r = 1;
 
-	name = dentry_name(file->f_dentry, 0);
+	name = dentry_name(file->f_path.dentry, 0);
 	if(name == NULL)
 		return(-ENOMEM);
 
-- 
cgit v1.2.3


From 575800b3ccd51246f6bac3a07e253f72a61d74b9 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:06 -0800
Subject: [PATCH] struct path: convert hpfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/dir.c  | 4 ++--
 fs/hpfs/file.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 594f9c428fc..6916c41d701 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -24,7 +24,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 	loff_t new_off = off + (whence == 1 ? filp->f_pos : 0);
 	loff_t pos;
 	struct quad_buffer_head qbh;
-	struct inode *i = filp->f_dentry->d_inode;
+	struct inode *i = filp->f_path.dentry->d_inode;
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
 	struct super_block *s = i->i_sb;
 
@@ -52,7 +52,7 @@ fail:
 
 static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 8b94d24855f..fb4c8915010 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -115,7 +115,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 
 	retval = do_sync_write(file, buf, count, ppos);
 	if (retval > 0)
-		hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
+		hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
 	return retval;
 }
 
-- 
cgit v1.2.3


From 471b17e7ed0cb29338458eaa09103902891d802a Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:07 -0800
Subject: [PATCH] struct path: convert hppfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hppfs/hppfs_kern.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 642675fc394..afd340a45da 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -221,7 +221,7 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
 	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 	ssize_t n;
 
-	read = file->f_dentry->d_inode->i_fop->read;
+	read = file->f_path.dentry->d_inode->i_fop->read;
 
 	if(!is_user)
 		set_fs(KERNEL_DS);
@@ -320,7 +320,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len
 	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
 	int err;
 
-	write = proc_file->f_dentry->d_inode->i_fop->write;
+	write = proc_file->f_path.dentry->d_inode->i_fop->write;
 
 	proc_file->f_pos = file->f_pos;
 	err = (*write)(proc_file, buf, len, &proc_file->f_pos);
@@ -464,7 +464,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 	if(data == NULL)
 		goto out;
 
-	host_file = dentry_name(file->f_dentry, strlen("/rw"));
+	host_file = dentry_name(file->f_path.dentry, strlen("/rw"));
 	if(host_file == NULL)
 		goto out_free2;
 
@@ -547,7 +547,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	loff_t (*llseek)(struct file *, loff_t, int);
 	loff_t ret;
 
-	llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
+	llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek;
 	if(llseek != NULL){
 		ret = (*llseek)(proc_file, off, where);
 		if(ret < 0)
@@ -591,10 +591,10 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	struct hppfs_dirent dirent = ((struct hppfs_dirent)
 		                      { .vfs_dirent  	= ent,
 					.filldir 	= filldir,
-					.dentry  	= file->f_dentry } );
+					.dentry  	= file->f_path.dentry } );
 	int err;
 
-	readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
+	readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir;
 
 	proc_file->f_pos = file->f_pos;
 	err = (*readdir)(proc_file, &dirent, hppfs_filldir);
-- 
cgit v1.2.3


From b39424e274a17c9c3233c9911dd328b10370de8f Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:07 -0800
Subject: [PATCH] struct path: convert hugetlbfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0706f5aac6a..ed2c22340ad 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -58,7 +58,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
 
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t len, vma_len;
 	int ret;
 
@@ -774,8 +774,8 @@ struct file *hugetlb_zero_setup(size_t size)
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
-	file->f_vfsmnt = mntget(hugetlbfs_vfsmount);
-	file->f_dentry = dentry;
+	file->f_path.mnt = mntget(hugetlbfs_vfsmount);
+	file->f_path.dentry = dentry;
 	file->f_mapping = inode->i_mapping;
 	file->f_op = &hugetlbfs_file_operations;
 	file->f_mode = FMODE_WRITE | FMODE_READ;
-- 
cgit v1.2.3


From 4d832d460e13dd9c2cc93739ac0fd75bd8645e93 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:15 -0800
Subject: [PATCH] struct path: convert jffs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/inode-v23.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 9f15bce9202..7b40c69f44e 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -566,7 +566,7 @@ static int
 jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct jffs_file *f;
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
 	int j;
@@ -1372,7 +1372,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	struct jffs_control *c;
 	struct jffs_file *f;
 	struct jffs_node *node;
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int recoverable = 0;
 	size_t written = 0;
@@ -1380,7 +1380,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 	loff_t pos = *ppos;
 	int err;
 
-	inode = filp->f_dentry->d_inode;
+	inode = filp->f_path.dentry->d_inode;
 
 	D2(printk("***jffs_file_write(): inode: 0x%p (ino: %lu), "
 		  "filp: 0x%p, buf: 0x%p, count: %d\n",
-- 
cgit v1.2.3


From ec2e203c8297d576c74a101cebd657bc1f180dd1 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:16 -0800
Subject: [PATCH] struct path: convert jffs2

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs2/dir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 9def6adf4a5..da6034d5071 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -123,11 +123,11 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct jffs2_inode_info *f;
 	struct jffs2_sb_info *c;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct jffs2_full_dirent *fd;
 	unsigned long offset, curofs;
 
-	D1(printk(KERN_DEBUG "jffs2_readdir() for dir_i #%lu\n", filp->f_dentry->d_inode->i_ino));
+	D1(printk(KERN_DEBUG "jffs2_readdir() for dir_i #%lu\n", filp->f_path.dentry->d_inode->i_ino));
 
 	f = JFFS2_INODE_INFO(inode);
 	c = JFFS2_SB_INFO(inode->i_sb);
@@ -141,7 +141,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		offset++;
 	}
 	if (offset == 1) {
-		unsigned long pino = parent_ino(filp->f_dentry);
+		unsigned long pino = parent_ino(filp->f_path.dentry);
 		D1(printk(KERN_DEBUG "Dirent 1: \"..\", ino #%lu\n", pino));
 		if (filldir(dirent, "..", 2, 1, pino, DT_DIR) < 0)
 			goto out;
-- 
cgit v1.2.3


From ff273773bfd4f2131bad1318e56519fcceac2339 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:16 -0800
Subject: [PATCH] struct path: convert jfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_dtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index ecb2216d881..47bc0b5d132 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3009,7 +3009,7 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
  */
 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *ip = filp->f_dentry->d_inode;
+	struct inode *ip = filp->f_path.dentry->d_inode;
 	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
 	int rc = 0;
 	loff_t dtpos;	/* legacy OS/2 style position */
-- 
cgit v1.2.3


From 225a719f79fbc4d0cd9d9ebc5b2e3ac0e95845aa Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:18 -0800
Subject: [PATCH] struct path: convert lockd

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c |  2 +-
 fs/lockd/clntproc.c |  2 +-
 fs/lockd/svclock.c  | 16 ++++++++--------
 fs/lockd/svcsubs.c  |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index b85a0ad2cfb..92681c9e9b2 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -126,7 +126,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock
 			continue;
 		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
 			continue;
-		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
+		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
 			continue;
 		/* Alright, we found a lock. Set the return status
 		 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 497c3cd59d5..80a1a6dccc8 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -129,7 +129,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
-	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
+	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 7e219b93855..5c054b20fd5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -343,8 +343,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-				file->f_file->f_dentry->d_inode->i_sb->s_id,
-				file->f_file->f_dentry->d_inode->i_ino,
+				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+				file->f_file->f_path.dentry->d_inode->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end,
@@ -420,8 +420,8 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-				file->f_file->f_dentry->d_inode->i_sb->s_id,
-				file->f_file->f_dentry->d_inode->i_ino,
+				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+				file->f_file->f_path.dentry->d_inode->i_ino,
 				lock->fl.fl_type,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -454,8 +454,8 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
 	int	error;
 
 	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file->f_file->f_dentry->d_inode->i_sb->s_id,
-				file->f_file->f_dentry->d_inode->i_ino,
+				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+				file->f_file->f_path.dentry->d_inode->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -483,8 +483,8 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file->f_file->f_dentry->d_inode->i_sb->s_id,
-				file->f_file->f_dentry->d_inode->i_ino,
+				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
+				file->f_file->f_path.dentry->d_inode->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e83024e1604..c0df00c74ce 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -43,7 +43,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-	struct inode *inode = file->f_file->f_dentry->d_inode;
+	struct inode *inode = file->f_file->f_path.dentry->d_inode;
 
 	dprintk("lockd: %s %s/%ld\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
-- 
cgit v1.2.3


From dcf258ae682c2dbbe1a34624f2b3f56353d3874d Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:19 -0800
Subject: [PATCH] struct path: convert minix

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/minix/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 2b0a389d198..ab782c4086f 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -82,7 +82,7 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
 static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	unsigned long pos = filp->f_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	unsigned offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.3


From 92e5baef8578a03335059a3dec933955c361edc1 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:22 -0800
Subject: [PATCH] struct path: convert ncpfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/dir.c   | 8 ++++----
 fs/ncpfs/file.c  | 4 ++--
 fs/ncpfs/inode.c | 4 ++--
 fs/ncpfs/ioctl.c | 8 ++++----
 fs/ncpfs/mmap.c  | 4 ++--
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 458b3b78519..73747772c3b 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -402,7 +402,7 @@ static time_t ncp_obtain_mtime(struct dentry *dentry)
 
 static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct page *page = NULL;
 	struct ncp_server *server = NCP_SERVER(inode);
@@ -554,7 +554,7 @@ static int
 ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 		struct ncp_cache_control *ctrl, struct ncp_entry_info *entry)
 {
-	struct dentry *newdent, *dentry = filp->f_dentry;
+	struct dentry *newdent, *dentry = filp->f_path.dentry;
 	struct inode *newino, *inode = dentry->d_inode;
 	struct ncp_cache_control ctl = *ctrl;
 	struct qstr qname;
@@ -649,7 +649,7 @@ static void
 ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir,
 			struct ncp_cache_control *ctl)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct ncp_server *server = NCP_SERVER(inode);
 	struct ncp_volume_info info;
@@ -685,7 +685,7 @@ static void
 ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 						struct ncp_cache_control *ctl)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *dir = dentry->d_inode;
 	struct ncp_server *server = NCP_SERVER(dir);
 	struct nw_search_sequence seq;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index df37524b85d..b91fea03b1c 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -101,7 +101,7 @@ out:
 static ssize_t
 ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	size_t already_read = 0;
 	off_t pos;
@@ -182,7 +182,7 @@ outrel:
 static ssize_t
 ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	size_t already_written = 0;
 	off_t pos;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fae53243bb9..47462ac9447 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -471,7 +471,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	if (!ncp_filp)
 		goto out;
 	error = -ENOTSOCK;
-	sock_inode = ncp_filp->f_dentry->d_inode;
+	sock_inode = ncp_filp->f_path.dentry->d_inode;
 	if (!S_ISSOCK(sock_inode->i_mode))
 		goto out_fput;
 	sock = SOCKET_I(sock_inode);
@@ -504,7 +504,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 		if (!server->info_filp)
 			goto out_fput;
 		error = -ENOTSOCK;
-		sock_inode = server->info_filp->f_dentry->d_inode;
+		sock_inode = server->info_filp->f_path.dentry->d_inode;
 		if (!S_ISSOCK(sock_inode->i_mode))
 			goto out_fput2;
 		info_sock = SOCKET_I(sock_inode);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 589d1eac55c..8843a83d4ef 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -35,7 +35,7 @@ static int
 ncp_get_fs_info(struct ncp_server * server, struct file *file,
 		struct ncp_fs_info __user *arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ncp_fs_info info;
 
 	if ((file_permission(file, MAY_WRITE) != 0)
@@ -65,7 +65,7 @@ static int
 ncp_get_fs_info_v2(struct ncp_server * server, struct file *file,
 		   struct ncp_fs_info_v2 __user * arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ncp_fs_info_v2 info2;
 
 	if ((file_permission(file, MAY_WRITE) != 0)
@@ -136,7 +136,7 @@ static int
 ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file,
 		   struct compat_ncp_fs_info_v2 __user * arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct compat_ncp_fs_info_v2 info2;
 
 	if ((file_permission(file, MAY_WRITE) != 0)
@@ -824,7 +824,7 @@ outrel:
 #ifdef CONFIG_COMPAT
 long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int ret;
 
 	lock_kernel();
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index e7d5a3097fe..70a69115500 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -29,7 +29,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
 				     unsigned long address, int *type)
 {
 	struct file *file = area->vm_file;
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct page* page;
 	char *pg_addr;
@@ -106,7 +106,7 @@ static struct vm_operations_struct ncp_file_mmap =
 /* This is used for a general mmap of a ncp file */
 int ncp_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	
 	DPRINTK("ncp_mmap: called\n");
 
-- 
cgit v1.2.3


From d28c91740ae2cd1d963f9e4e3889789894cb6d52 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:25 -0800
Subject: [PATCH] struct path: convert ocfs2

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ocfs2/aops.c      |  4 ++--
 fs/ocfs2/dir.c       |  2 +-
 fs/ocfs2/dlm/dlmfs.c |  4 ++--
 fs/ocfs2/file.c      | 34 +++++++++++++++++-----------------
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 2f7268e8152..ef6cd30108a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -595,7 +595,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     ssize_t bytes,
 			     void *private)
 {
-	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
+	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
 
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -611,7 +611,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 			       unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
 	int ret;
 
 	mlog_entry_void();
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index baad2aa27c1..66821e17816 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -79,7 +79,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
 	int err;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
 	unsigned int ra_sectors = 16;
 	int lock_level = 0;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 941acf14e61..b7f0ba97a1a 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -176,7 +176,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	int bytes_left;
 	ssize_t readlen;
 	char *lvb_buf;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
 		inode->i_ino, count, *ppos);
@@ -220,7 +220,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	int bytes_left;
 	ssize_t writelen;
 	char *lvb_buf;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
 		inode->i_ino, count, *ppos);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8786b3c490a..e9a82ad95c1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -68,7 +68,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
-		   file->f_dentry->d_name.len, file->f_dentry->d_name.name);
+		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
 	spin_lock(&oi->ip_lock);
 
@@ -98,8 +98,8 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
-		       file->f_dentry->d_name.len,
-		       file->f_dentry->d_name.name);
+		       file->f_path.dentry->d_name.len,
+		       file->f_path.dentry->d_name.name);
 
 	spin_lock(&oi->ip_lock);
 	if (!--oi->ip_open_count)
@@ -1131,13 +1131,13 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 {
 	int ret, rw_level, have_alloc_sem = 0;
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	int appending = filp->f_flags & O_APPEND ? 1 : 0;
 
 	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
 		   (unsigned int)nr_segs,
-		   filp->f_dentry->d_name.len,
-		   filp->f_dentry->d_name.name);
+		   filp->f_path.dentry->d_name.len,
+		   filp->f_path.dentry->d_name.name);
 
 	/* happy write of zero bytes */
 	if (iocb->ki_left == 0)
@@ -1159,7 +1159,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 		goto out;
 	}
 
-	ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos,
+	ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos,
 					    iocb->ki_left, appending);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -1207,12 +1207,12 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 				       unsigned int flags)
 {
 	int ret;
-	struct inode *inode = out->f_dentry->d_inode;
+	struct inode *inode = out->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
 		   (unsigned int)len,
-		   out->f_dentry->d_name.len,
-		   out->f_dentry->d_name.name);
+		   out->f_path.dentry->d_name.len,
+		   out->f_path.dentry->d_name.name);
 
 	inode_double_lock(inode, pipe->inode);
 
@@ -1222,7 +1222,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 		goto out;
 	}
 
-	ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0);
+	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_unlock;
@@ -1247,12 +1247,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 				      unsigned int flags)
 {
 	int ret = 0;
-	struct inode *inode = in->f_dentry->d_inode;
+	struct inode *inode = in->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
 		   (unsigned int)len,
-		   in->f_dentry->d_name.len,
-		   in->f_dentry->d_name.name);
+		   in->f_path.dentry->d_name.len,
+		   in->f_path.dentry->d_name.name);
 
 	/*
 	 * See the comment in ocfs2_file_aio_read()
@@ -1278,12 +1278,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 {
 	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
 		   (unsigned int)nr_segs,
-		   filp->f_dentry->d_name.len,
-		   filp->f_dentry->d_name.name);
+		   filp->f_path.dentry->d_name.len,
+		   filp->f_path.dentry->d_name.name);
 
 	if (!inode) {
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 80a067801d8cc9384f17991b90811f87ceab8225 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:26 -0800
Subject: [PATCH] struct path: convert openpromfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/openpromfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 26f44e0074e..99c0bc37ba0 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -262,7 +262,7 @@ found:
 
 static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct op_inode_info *oi = OP_I(inode);
 	struct device_node *dp = oi->u.node;
 	struct device_node *child;
-- 
cgit v1.2.3


From 24e23c24e7a4754e50439dfc33bdbd05b09c4cc5 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:32 -0800
Subject: [PATCH] struct path: convert qnx4

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/qnx4/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 0d7103fa0df..c94db1db7a7 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -22,7 +22,7 @@
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	unsigned int offset;
 	struct buffer_head *bh;
 	struct qnx4_inode_entry *de;
-- 
cgit v1.2.3


From a57c4d65f70cb43c8503bc213c8be638011c22e7 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:32 -0800
Subject: [PATCH] struct path: convert ramfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/file-nommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index bfe5dbf1002..61cbe1ef06b 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -232,7 +232,7 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 					    unsigned long pgoff, unsigned long flags)
 {
 	unsigned long maxpages, lpages, nr, loop, ret;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct page **pages = NULL, **ptr, *page;
 	loff_t isize;
 
-- 
cgit v1.2.3


From 1fc5adbd1916793c19d25347f484806c124d9be7 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:33 -0800
Subject: [PATCH] struct path: convert reiserfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/dir.c    |  4 ++--
 fs/reiserfs/file.c   |  4 ++--
 fs/reiserfs/ioctl.c  |  2 +-
 fs/reiserfs/procfs.c |  2 +-
 fs/reiserfs/xattr.c  | 10 +++++-----
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 657050ad743..96a2f8889da 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -45,7 +45,7 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 //
 static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
 	INITIALIZE_PATH(path_to_entry);
 	struct buffer_head *bh;
@@ -135,7 +135,7 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				/* Ignore the .reiserfs_priv entry */
 				if (reiserfs_xattrs(inode->i_sb) &&
 				    !old_format_only(inode->i_sb) &&
-				    filp->f_dentry == inode->i_sb->s_root &&
+				    filp->f_path.dentry == inode->i_sb->s_root &&
 				    REISERFS_SB(inode->i_sb)->priv_root &&
 				    REISERFS_SB(inode->i_sb)->priv_root->d_inode
 				    && deh_objectid(deh) ==
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 373d862c3f8..99b6f329ba2 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1288,7 +1288,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 	loff_t pos;		// Current position in the file.
 	ssize_t res;		// return value of various functions that we call.
 	int err = 0;
-	struct inode *inode = file->f_dentry->d_inode;	// Inode of the file that we are writing to.
+	struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to.
 	/* To simplify coding at this time, we store
 	   locked pages in array for now */
 	struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
@@ -1335,7 +1335,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 	if (count == 0)
 		goto out;
 
-	res = remove_suid(file->f_dentry);
+	res = remove_suid(file->f_path.dentry);
 	if (res)
 		goto out;
 
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 9c57578cb83..b484d2913c0 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -99,7 +99,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index c533ec1bcae..ecc9943202f 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -295,7 +295,7 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
 	}
 #if defined( REISERFS_USE_OIDMAPF )
 	if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
-		loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size;
+		loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size;
 		total_used += size / sizeof(reiserfs_oidinterval_d_t);
 	}
 #endif
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 1e4d6859017..f01389fd162 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -274,7 +274,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name,
  */
 static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
 	INITIALIZE_PATH(path_to_entry);
 	struct buffer_head *bh;
@@ -420,7 +420,7 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
 static
 int xattr_readdir(struct file *file, filldir_t filler, void *buf)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
@@ -508,7 +508,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 		goto out;
 	}
 
-	xinode = fp->f_dentry->d_inode;
+	xinode = fp->f_path.dentry->d_inode;
 	REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
 
 	/* we need to copy it off.. */
@@ -527,7 +527,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 	newattrs.ia_size = buffer_size;
 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 	mutex_lock(&xinode->i_mutex);
-	err = notify_change(fp->f_dentry, &newattrs);
+	err = notify_change(fp->f_path.dentry, &newattrs);
 	if (err)
 		goto out_filp;
 
@@ -626,7 +626,7 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
 		goto out;
 	}
 
-	xinode = fp->f_dentry->d_inode;
+	xinode = fp->f_path.dentry->d_inode;
 	isize = xinode->i_size;
 	REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
 
-- 
cgit v1.2.3


From 3027795e7b873da643e851d62d17f6bcc43d6e4a Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:34 -0800
Subject: [PATCH] struct path: convert romfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/romfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c5af088d4a4..d3e243a6f60 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -276,7 +276,7 @@ static unsigned char romfs_dtype_table[] = {
 static int
 romfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *i = filp->f_dentry->d_inode;
+	struct inode *i = filp->f_path.dentry->d_inode;
 	struct romfs_inode ri;
 	unsigned long offset, maxoff;
 	int j, ino, nextfh;
-- 
cgit v1.2.3


From 17b75e69493f655a09908045eddbb48718aef5de Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:39 -0800
Subject: [PATCH] struct path: convert smbfs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/cache.c |  2 +-
 fs/smbfs/dir.c   |  6 +++---
 fs/smbfs/file.c  | 14 +++++++-------
 fs/smbfs/proc.c  | 10 +++++-----
 fs/smbfs/sock.c  |  4 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 74b86d9725a..8182f0542a2 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -125,7 +125,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	       struct smb_cache_control *ctrl, struct qstr *qname,
 	       struct smb_fattr *entry)
 {
-	struct dentry *newdent, *dentry = filp->f_dentry;
+	struct dentry *newdent, *dentry = filp->f_path.dentry;
 	struct inode *newino, *inode = dentry->d_inode;
 	struct smb_cache_control ctl = *ctrl;
 	int valid = 0;
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 70d9c5a37f5..b1e58d1ac9c 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -78,7 +78,7 @@ struct inode_operations smb_dir_inode_operations_unix =
 static int 
 smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *dir = dentry->d_inode;
 	struct smb_sb_info *server = server_from_dentry(dentry);
 	union  smb_dir_cache *cache = NULL;
@@ -238,12 +238,12 @@ out:
 static int
 smb_dir_open(struct inode *dir, struct file *file)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct smb_sb_info *server;
 	int error = 0;
 
 	VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
-		file->f_dentry->d_name.name);
+		file->f_path.dentry->d_name.name);
 
 	/*
 	 * Directory timestamps in the core protocol aren't updated
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 50784d13c87..e50533a7951 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -102,7 +102,7 @@ static int
 smb_readpage(struct file *file, struct page *page)
 {
 	int		error;
-	struct dentry  *dentry = file->f_dentry;
+	struct dentry  *dentry = file->f_path.dentry;
 
 	page_cache_get(page);
 	error = smb_readpage_sync(dentry, page);
@@ -205,7 +205,7 @@ static int
 smb_updatepage(struct file *file, struct page *page, unsigned long offset,
 	       unsigned int count)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 
 	DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
 		((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
@@ -218,7 +218,7 @@ smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			unsigned long nr_segs, loff_t pos)
 {
 	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	ssize_t	status;
 
 	VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
@@ -243,7 +243,7 @@ out:
 static int
 smb_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	int	status;
 
 	VERBOSE("file %s/%s, address %lu - %lu\n",
@@ -264,7 +264,7 @@ static ssize_t
 smb_file_sendfile(struct file *file, loff_t *ppos,
 		  size_t count, read_actor_t actor, void *target)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	ssize_t status;
 
 	VERBOSE("file %s/%s, pos=%Ld, count=%d\n",
@@ -323,7 +323,7 @@ smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
 	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_dentry;
+	struct dentry * dentry = file->f_path.dentry;
 	ssize_t	result;
 
 	VERBOSE("file %s/%s, count=%lu@%lu\n",
@@ -355,7 +355,7 @@ static int
 smb_file_open(struct inode *inode, struct file * file)
 {
 	int result;
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	int smb_mode = (file->f_mode & O_ACCMODE) - 1;
 
 	lock_kernel();
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 40e174db987..a5ced9e0c6c 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -873,7 +873,7 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
 	filp = fget(opt->fd);
 	if (!filp)
 		goto out;
-	if (!smb_valid_socket(filp->f_dentry->d_inode))
+	if (!smb_valid_socket(filp->f_path.dentry->d_inode))
 		goto out_putf;
 
 	server->sock_file = filp;
@@ -898,7 +898,7 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
 	/*
 	 * Store the server in sock user_data (Only used by sunrpc)
 	 */
-	sk = SOCKET_I(filp->f_dentry->d_inode)->sk;
+	sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
 	sk->sk_user_data = server;
 
 	/* chain into the data_ready callback */
@@ -1939,7 +1939,7 @@ static int
 smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
 		       struct smb_cache_control *ctl)
 {
-	struct dentry *dir = filp->f_dentry;
+	struct dentry *dir = filp->f_path.dentry;
 	struct smb_sb_info *server = server_from_dentry(dir);
 	struct qstr qname;
 	struct smb_fattr fattr;
@@ -2291,7 +2291,7 @@ static int
 smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
 		      struct smb_cache_control *ctl)
 {
-	struct dentry *dir = filp->f_dentry;
+	struct dentry *dir = filp->f_path.dentry;
 	struct smb_sb_info *server = server_from_dentry(dir);
 	struct qstr qname;
 	struct smb_fattr fattr;
@@ -2859,7 +2859,7 @@ static int
 smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
 		      struct smb_cache_control *ctl)
 {
-	struct smb_sb_info *server = server_from_dentry(filp->f_dentry);
+	struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
 
 	if (smb_proc_ops_wait(server) < 0)
 		return -EIO;
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
index 6815b1b12b6..92ea6b2367d 100644
--- a/fs/smbfs/sock.c
+++ b/fs/smbfs/sock.c
@@ -82,10 +82,10 @@ server_sock(struct smb_sb_info *server)
 	if (server && (file = server->sock_file))
 	{
 #ifdef SMBFS_PARANOIA
-		if (!smb_valid_socket(file->f_dentry->d_inode))
+		if (!smb_valid_socket(file->f_path.dentry->d_inode))
 			PARANOIA("bad socket!\n");
 #endif
-		return SOCKET_I(file->f_dentry->d_inode);
+		return SOCKET_I(file->f_path.dentry->d_inode);
 	}
 	return NULL;
 }
-- 
cgit v1.2.3


From 5495ca657d854ac57733ce5a52eee52255b3f4d8 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:43 -0800
Subject: [PATCH] struct path: convert sysv

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index f2bef962d30..ebf7007fa16 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -70,7 +70,7 @@ fail:
 static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	unsigned long pos = filp->f_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	unsigned offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.3


From 5096e933a943c23bd9314b0ac7e14d07073cb2e5 Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:44 -0800
Subject: [PATCH] struct path: convert udf

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/dir.c  | 4 ++--
 fs/udf/file.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 8c28efa3b8f..2391c9150c4 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -77,7 +77,7 @@ const struct file_operations udf_dir_operations = {
 
 int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *dir = filp->f_dentry->d_inode;
+	struct inode *dir = filp->f_path.dentry->d_inode;
 	int result;
 
 	lock_kernel();
@@ -225,7 +225,7 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
 
 		if ( cfi.fileCharacteristics & FID_FILE_CHAR_PARENT )
 		{
-			iblock = parent_ino(filp->f_dentry);
+			iblock = parent_ino(filp->f_path.dentry);
 			flen = 2;
 			memcpy(fname, "..", flen);
 			dt_type = DT_DIR;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7aedd552cba..d81f2db7b0e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -108,7 +108,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int err, pos;
 	size_t count = iocb->ki_left;
 
-- 
cgit v1.2.3


From 763454d6108ad1c6d4e1013321aef8bf5efb3a4f Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:45 -0800
Subject: [PATCH] struct path: convert ufs

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 7f0a0aa6358..433b6f68403 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -426,7 +426,7 @@ static int
 ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.3


From f0d1b0b30d250a07627ad8b9fbbb5c7cc08422e8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Dec 2006 02:37:49 -0800
Subject: [PATCH] LOG2: Implement a general integer log2 facility in the kernel

This facility provides three entry points:

	ilog2()		Log base 2 of unsigned long
	ilog2_u32()	Log base 2 of u32
	ilog2_u64()	Log base 2 of u64

These facilities can either be used inside functions on dynamic data:

	int do_something(long q)
	{
		...;
		y = ilog2(x)
		...;
	}

Or can be used to statically initialise global variables with constant values:

	unsigned n = ilog2(27);

When performing static initialisation, the compiler will report "error:
initializer element is not constant" if asked to take a log of zero or of
something not reducible to a constant.  They treat negative numbers as
unsigned.

When not dealing with a constant, they fall back to using fls() which permits
them to use arch-specific log calculation instructions - such as BSR on
x86/x86_64 or SCAN on FRV - if available.

[akpm@osdl.org: MMC fix]
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Howells <dhowells@redhat.com>
Cc: Wojtek Kaniewski <wojtekka@toxygen.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 6 ++----
 fs/ext3/super.c | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 255cef5f742..6347c2dbdd8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -597,8 +597,6 @@ static int ext2_check_descriptors (struct super_block * sb)
 	return 1;
 }
 
-#define log2(n) ffz(~(n))
- 
 /*
  * Maximal file size.  There is a direct, and {,double-,triple-}indirect
  * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
@@ -834,9 +832,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits =
-		log2 (EXT2_ADDR_PER_BLOCK(sb));
+		ilog2 (EXT2_ADDR_PER_BLOCK(sb));
 	sbi->s_desc_per_block_bits =
-		log2 (EXT2_DESC_PER_BLOCK(sb));
+		ilog2 (EXT2_DESC_PER_BLOCK(sb));
 
 	if (sb->s_magic != EXT2_SUPER_MAGIC)
 		goto cantfind_ext2;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 580b8a6ca97..b34886734a4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1347,8 +1347,6 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 }
 
-#define log2(n) ffz(~(n))
-
 /*
  * Maximal file size.  There is a direct, and {,double-,triple-}indirect
  * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
@@ -1597,8 +1595,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
-	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
+	sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
+	sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
 	for (i=0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
-- 
cgit v1.2.3


From 937949d9edbf4049bd41af6c9f92c26280584564 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:54 -0800
Subject: [PATCH] add process_session() helper routine

Replace occurences of task->signal->session by a new process_session() helper
routine.

It will be useful for pid namespaces to abstract the session pid number.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c       | 4 ++--
 fs/binfmt_elf_fdpic.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c6dbb4a7ec7..d3adfd353ff 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1317,7 +1317,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1363,7 +1363,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9f0b7efc3df..76f06f6bc2f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1322,7 +1322,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1371,7 +1371,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
-- 
cgit v1.2.3


From 1ec320afdc9552c92191d5f89fcd1ebe588334ca Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:55 -0800
Subject: [PATCH] add process_session() helper routine: deprecate old field

Add an anonymous union and ((deprecated)) to catch direct usage of the
session field.

[akpm@osdl.org: fix various missed conversions]
[jdike@addtoit.com: fix UML bug]
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/array.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index b0cd014a39b..70e4fab117b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,7 +381,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			stime = cputime_add(stime, sig->stime);
 		}
 
-		sid = sig->session;
+		sid = signal_session(sig);
 		pgid = process_group(task);
 		ppid = rcu_dereference(task->real_parent)->tgid;
 
-- 
cgit v1.2.3


From 6b3286ed1169d74fea401367d6d4d6c6ec758a81 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 8 Dec 2006 02:37:56 -0800
Subject: [PATCH] rename struct namespace to struct mnt_namespace

Rename 'struct namespace' to 'struct mnt_namespace' to avoid confusion with
other namespaces being developped for the containers : pid, uts, ipc, etc.
'namespace' variables and attributes are also renamed to 'mnt_ns'

Signed-off-by: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/mntpt.c      |   2 +-
 fs/namespace.c      | 113 ++++++++++++++++++++++++++--------------------------
 fs/nfs/getroot.c    |   2 +-
 fs/pnode.c          |   2 +-
 fs/pnode.h          |   2 +-
 fs/proc/base.c      |  36 ++++++++---------
 fs/reiserfs/super.c |   2 +-
 7 files changed, 80 insertions(+), 79 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index f33b1a81a76..8f74e845082 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -18,7 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include "super.h"
 #include "cell.h"
 #include "volume.h"
diff --git a/fs/namespace.c b/fs/namespace.c
index b00ac84ebbd..fde8553faa7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/sysfs.h>
 #include <linux/seq_file.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
@@ -133,10 +133,10 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 
 static inline int check_mnt(struct vfsmount *mnt)
 {
-	return mnt->mnt_namespace == current->nsproxy->namespace;
+	return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
 
-static void touch_namespace(struct namespace *ns)
+static void touch_mnt_namespace(struct mnt_namespace *ns)
 {
 	if (ns) {
 		ns->event = ++event;
@@ -144,7 +144,7 @@ static void touch_namespace(struct namespace *ns)
 	}
 }
 
-static void __touch_namespace(struct namespace *ns)
+static void __touch_mnt_namespace(struct mnt_namespace *ns)
 {
 	if (ns && ns->event != event) {
 		ns->event = event;
@@ -187,19 +187,19 @@ static void commit_tree(struct vfsmount *mnt)
 	struct vfsmount *parent = mnt->mnt_parent;
 	struct vfsmount *m;
 	LIST_HEAD(head);
-	struct namespace *n = parent->mnt_namespace;
+	struct mnt_namespace *n = parent->mnt_ns;
 
 	BUG_ON(parent == mnt);
 
 	list_add_tail(&head, &mnt->mnt_list);
 	list_for_each_entry(m, &head, mnt_list)
-		m->mnt_namespace = n;
+		m->mnt_ns = n;
 	list_splice(&head, n->list.prev);
 
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(parent, mnt->mnt_mountpoint));
 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
-	touch_namespace(n);
+	touch_mnt_namespace(n);
 }
 
 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -320,7 +320,7 @@ EXPORT_SYMBOL(mnt_unpin);
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct mnt_namespace *n = m->private;
 	struct list_head *p;
 	loff_t l = *pos;
 
@@ -333,7 +333,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct mnt_namespace *n = m->private;
 	struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
 	(*pos)++;
 	return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
@@ -526,8 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 	list_for_each_entry(p, kill, mnt_hash) {
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
-		__touch_namespace(p->mnt_namespace);
-		p->mnt_namespace = NULL;
+		__touch_mnt_namespace(p->mnt_ns);
+		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
 		if (p->mnt_parent != p)
 			p->mnt_mountpoint->d_mounted--;
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
 	if (parent_nd) {
 		detach_mnt(source_mnt, parent_nd);
 		attach_mnt(source_mnt, nd);
-		touch_namespace(current->nsproxy->namespace);
+		touch_mnt_namespace(current->nsproxy->mnt_ns);
 	} else {
 		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
 		commit_tree(source_mnt);
@@ -1145,9 +1145,9 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
 	 */
 	if (!propagate_mount_busy(mnt, 2)) {
 		/* delete from the namespace */
-		touch_namespace(mnt->mnt_namespace);
+		touch_mnt_namespace(mnt->mnt_ns);
 		list_del_init(&mnt->mnt_list);
-		mnt->mnt_namespace = NULL;
+		mnt->mnt_ns = NULL;
 		umount_tree(mnt, 1, umounts);
 		spin_unlock(&vfsmount_lock);
 	} else {
@@ -1168,7 +1168,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
  */
 static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
 {
-	struct namespace *namespace;
+	struct mnt_namespace *ns;
 	struct vfsmount *mnt;
 
 	while (!list_empty(graveyard)) {
@@ -1178,10 +1178,10 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
 
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
-		namespace = mnt->mnt_namespace;
-		if (!namespace || !namespace->root)
+		ns = mnt->mnt_ns;
+		if (!ns || !ns->root)
 			continue;
-		get_namespace(namespace);
+		get_mnt_ns(ns);
 
 		spin_unlock(&vfsmount_lock);
 		down_write(&namespace_sem);
@@ -1189,7 +1189,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
 		up_write(&namespace_sem);
 		release_mounts(&umounts);
 		mntput(mnt);
-		put_namespace(namespace);
+		put_mnt_ns(ns);
 		spin_lock(&vfsmount_lock);
 	}
 }
@@ -1439,14 +1439,15 @@ dput_out:
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
  */
-struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
+struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
+		struct fs_struct *fs)
 {
-	struct namespace *namespace = tsk->nsproxy->namespace;
-	struct namespace *new_ns;
+	struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
+	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
 	struct vfsmount *p, *q;
 
-	new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
+	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return NULL;
 
@@ -1457,7 +1458,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
-	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
+	new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
 					CL_COPY_ALL | CL_EXPIRE);
 	if (!new_ns->root) {
 		up_write(&namespace_sem);
@@ -1473,10 +1474,10 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 	 * as belonging to new namespace.  We have already acquired a private
 	 * fs_struct, so tsk->fs->lock is not needed.
 	 */
-	p = namespace->root;
+	p = mnt_ns->root;
 	q = new_ns->root;
 	while (p) {
-		q->mnt_namespace = new_ns;
+		q->mnt_ns = new_ns;
 		if (fs) {
 			if (p == fs->rootmnt) {
 				rootmnt = p;
@@ -1491,7 +1492,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 				fs->altrootmnt = mntget(q);
 			}
 		}
-		p = next_mnt(p, namespace->root);
+		p = next_mnt(p, mnt_ns->root);
 		q = next_mnt(q, new_ns->root);
 	}
 	up_write(&namespace_sem);
@@ -1506,16 +1507,16 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 	return new_ns;
 }
 
-int copy_namespace(int flags, struct task_struct *tsk)
+int copy_mnt_ns(int flags, struct task_struct *tsk)
 {
-	struct namespace *namespace = tsk->nsproxy->namespace;
-	struct namespace *new_ns;
+	struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
+	struct mnt_namespace *new_ns;
 	int err = 0;
 
-	if (!namespace)
+	if (!ns)
 		return 0;
 
-	get_namespace(namespace);
+	get_mnt_ns(ns);
 
 	if (!(flags & CLONE_NEWNS))
 		return 0;
@@ -1525,16 +1526,16 @@ int copy_namespace(int flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	new_ns = dup_namespace(tsk, tsk->fs);
+	new_ns = dup_mnt_ns(tsk, tsk->fs);
 	if (!new_ns) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	tsk->nsproxy->namespace = new_ns;
+	tsk->nsproxy->mnt_ns = new_ns;
 
 out:
-	put_namespace(namespace);
+	put_mnt_ns(ns);
 	return err;
 }
 
@@ -1754,7 +1755,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	detach_mnt(user_nd.mnt, &root_parent);
 	attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
 	attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
-	touch_namespace(current->nsproxy->namespace);
+	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1779,27 +1780,27 @@ out3:
 static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
-	struct namespace *namespace;
+	struct mnt_namespace *ns;
 
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
-	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
-	if (!namespace)
+	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
 		panic("Can't allocate initial namespace");
-	atomic_set(&namespace->count, 1);
-	INIT_LIST_HEAD(&namespace->list);
-	init_waitqueue_head(&namespace->poll);
-	namespace->event = 0;
-	list_add(&mnt->mnt_list, &namespace->list);
-	namespace->root = mnt;
-	mnt->mnt_namespace = namespace;
-
-	init_task.nsproxy->namespace = namespace;
-	get_namespace(namespace);
-
-	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
-	set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
+	atomic_set(&ns->count, 1);
+	INIT_LIST_HEAD(&ns->list);
+	init_waitqueue_head(&ns->poll);
+	ns->event = 0;
+	list_add(&mnt->mnt_list, &ns->list);
+	ns->root = mnt;
+	mnt->mnt_ns = ns;
+
+	init_task.nsproxy->mnt_ns = ns;
+	get_mnt_ns(ns);
+
+	set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
+	set_fs_root(current->fs, ns->root, ns->root->mnt_root);
 }
 
 void __init mnt_init(unsigned long mempages)
@@ -1860,11 +1861,11 @@ void __init mnt_init(unsigned long mempages)
 	init_mount_tree();
 }
 
-void __put_namespace(struct namespace *namespace)
+void __put_mnt_ns(struct mnt_namespace *ns)
 {
-	struct vfsmount *root = namespace->root;
+	struct vfsmount *root = ns->root;
 	LIST_HEAD(umount_list);
-	namespace->root = NULL;
+	ns->root = NULL;
 	spin_unlock(&vfsmount_lock);
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
@@ -1872,5 +1873,5 @@ void __put_namespace(struct namespace *namespace)
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
-	kfree(namespace);
+	kfree(ns);
 }
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 20c6f39ea38..8391bd7a83c 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -31,7 +31,7 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/security.h>
 
 #include <asm/system.h>
diff --git a/fs/pnode.c b/fs/pnode.c
index da42ee61c1d..56aacead836 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,7 +6,7 @@
  *	Author : Ram Pai (linuxram@us.ibm.com)
  *
  */
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
 #include "pnode.h"
diff --git a/fs/pnode.h b/fs/pnode.h
index 020e1bb60fd..d45bd8ec36b 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -13,7 +13,7 @@
 
 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
 #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
-#define IS_MNT_NEW(mnt)  (!mnt->mnt_namespace)
+#define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
 #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a71f1755bb5..a3b5074118a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -59,7 +59,7 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/rcupdate.h>
@@ -365,33 +365,33 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = get_proc_task(inode);
-	struct namespace *namespace = NULL;
+	struct mnt_namespace *ns = NULL;
 	struct proc_mounts *p;
 	int ret = -EINVAL;
 
 	if (task) {
 		task_lock(task);
-		namespace = task->nsproxy->namespace;
-		if (namespace)
-			get_namespace(namespace);
+		ns = task->nsproxy->mnt_ns;
+		if (ns)
+			get_mnt_ns(ns);
 		task_unlock(task);
 		put_task_struct(task);
 	}
 
-	if (namespace) {
+	if (ns) {
 		ret = -ENOMEM;
 		p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
 		if (p) {
 			file->private_data = &p->m;
 			ret = seq_open(file, &mounts_op);
 			if (!ret) {
-				p->m.private = namespace;
-				p->event = namespace->event;
+				p->m.private = ns;
+				p->event = ns->event;
 				return 0;
 			}
 			kfree(p);
 		}
-		put_namespace(namespace);
+		put_mnt_ns(ns);
 	}
 	return ret;
 }
@@ -399,15 +399,15 @@ static int mounts_open(struct inode *inode, struct file *file)
 static int mounts_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	struct namespace *namespace = m->private;
-	put_namespace(namespace);
+	struct mnt_namespace *ns = m->private;
+	put_mnt_ns(ns);
 	return seq_release(inode, file);
 }
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
 	struct proc_mounts *p = file->private_data;
-	struct namespace *ns = p->m.private;
+	struct mnt_namespace *ns = p->m.private;
 	unsigned res = 0;
 
 	poll_wait(file, &ns->poll, wait);
@@ -437,21 +437,21 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		struct namespace *namespace = NULL;
+		struct mnt_namespace *mnt_ns = NULL;
 		struct task_struct *task = get_proc_task(inode);
 
 		if (task) {
 			task_lock(task);
 			if (task->nsproxy)
-				namespace = task->nsproxy->namespace;
-			if (namespace)
-				get_namespace(namespace);
+				mnt_ns = task->nsproxy->mnt_ns;
+			if (mnt_ns)
+				get_mnt_ns(mnt_ns);
 			task_unlock(task);
 			put_task_struct(task);
 		}
 
-		if (namespace)
-			m->private = namespace;
+		if (mnt_ns)
+			m->private = mnt_ns;
 		else {
 			seq_release(inode, file);
 			ret = -EINVAL;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7fb5fb036f9..58ad4551a7c 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -23,7 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
-- 
cgit v1.2.3


From 61a58c6c238cc81f7742b8cc84212cc55fb57747 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Date: Fri, 8 Dec 2006 02:37:58 -0800
Subject: [PATCH] rename struct pspace to struct pid_namespace

Rename struct pspace to struct pid_namespace for consistency with other
namespaces (uts_namespace and ipc_namespace).  Also rename
include/linux/pspace.h to include/linux/pid_namespace.h and variables from
pspace to pid_ns.

Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9397ff62553..89ccfa16b00 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -45,7 +45,7 @@
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
-#include <linux/pspace.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -92,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, init_pspace.last_pid);
+		nr_running(), nr_threads, init_pid_ns.last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.3


From 6cc1b22a4acef3816eaa5f8c227d93d749b23195 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:38:00 -0800
Subject: [PATCH] use current->nsproxy->pid_ns

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 89ccfa16b00..f6d6f81dd44 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -92,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, init_pid_ns.last_pid);
+		nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.3


From 84d737866e2babdeab0c6b18ea155c6a649663b8 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Date: Fri, 8 Dec 2006 02:38:01 -0800
Subject: [PATCH] add child reaper to pid_namespace

Add a per pid_namespace child-reaper.  This is needed so processes are reaped
within the same pid space and do not spill over to the parent pid space.  Its
also needed so containers preserve existing semantic that pid == 1 would reap
orphaned children.

This is based on Eric Biederman's patch: http://lkml.org/lkml/2006/2/6/285

Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 60433e2254a..12d8cd461b4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -38,6 +38,7 @@
 #include <linux/binfmts.h>
 #include <linux/swap.h>
 #include <linux/utsname.h>
+#include <linux/pid_namespace.h>
 #include <linux/module.h>
 #include <linux/namei.h>
 #include <linux/proc_fs.h>
@@ -620,8 +621,8 @@ static int de_thread(struct task_struct *tsk)
 	 * Reparenting needs write_lock on tasklist_lock,
 	 * so it is safe to do it under read_lock.
 	 */
-	if (unlikely(tsk->group_leader == child_reaper))
-		child_reaper = tsk;
+	if (unlikely(tsk->group_leader == child_reaper(tsk)))
+		tsk->nsproxy->pid_ns->child_reaper = tsk;
 
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
-- 
cgit v1.2.3


From 4b75f78edcab291eb29fe9a205cbf7b80c1c644f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:39:38 -0800
Subject: [PATCH] lockdep: annotate nfsd4 recover code

> =============================================
> [ INFO: possible recursive locking detected ]
> 2.6.18-1.2724.lockdepPAE #1
> ---------------------------------------------
> nfsd/6884 is trying to acquire lock:
>  (&inode->i_mutex){--..}, at: [<c04811e5>] vfs_rmdir+0x73/0xf4
>
> but task is already holding lock:
>  (&inode->i_mutex){--..}, at: [<f8dfa621>]
> nfsd4_clear_clid_dir+0x1f/0x3d [nfsd]
>
> other info that might help us debug this:
> 3 locks held by nfsd/6884:
>  #0:  (hash_sem){----}, at: [<f8de05eb>] nfsd+0x181/0x2ea [nfsd]
>  #1:  (client_mutex){--..}, at: [<f8df6d19>]
> nfsd4_setclientid_confirm+0x3b/0x2cf [nfsd]
>  #2:  (&inode->i_mutex){--..}, at: [<f8dfa621>]
> nfsd4_clear_clid_dir+0x1f/0x3d [nfsd]
>
> stack backtrace:
>  [<c040524d>] dump_trace+0x69/0x1af
>  [<c04053ab>] show_trace_log_lvl+0x18/0x2c
>  [<c040595f>] show_trace+0xf/0x11
>  [<c0405a53>] dump_stack+0x15/0x17
>  [<c043ca7a>] __lock_acquire+0x110/0x9b6
>  [<c043d91e>] lock_acquire+0x5c/0x7a
>  [<c061a41b>] __mutex_lock_slowpath+0xde/0x234
>  [<c04811e5>] vfs_rmdir+0x73/0xf4
>  [<f8dfa62b>] nfsd4_clear_clid_dir+0x29/0x3d [nfsd]
>  [<f8dfa733>] nfsd4_remove_clid_dir+0xb8/0xf8 [nfsd]
>  [<f8df6e90>] nfsd4_setclientid_confirm+0x1b2/0x2cf [nfsd]
>  [<f8def19a>] nfsd4_proc_compound+0x137a/0x166c [nfsd]
>  [<f8de00d5>] nfsd_dispatch+0xc5/0x180 [nfsd]
>  [<f8d09d83>] svc_process+0x3bd/0x631 [sunrpc]
>  [<f8de0604>] nfsd+0x19a/0x2ea [nfsd]
>  [<c0404e27>] kernel_thread_helper+0x7/0x10
> DWARF2 unwinder stuck at kernel_thread_helper+0x7/0x10
> Leftover inexact backtrace:
>  =======================

Some nesting annotation to the nfsd4 recovery code.
The vfs operations called will take dentry->d_inode->i_mutex.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 81b8565d383..c7774e3a946 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -259,7 +259,7 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
 		printk("nfsd4: non-file found in client recovery directory\n");
 		return -EINVAL;
 	}
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	status = vfs_unlink(dir->d_inode, dentry);
 	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
-- 
cgit v1.2.3


From cb65a5ba3dcef3184ef1ba026b1e268bd1a9323f Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Fri, 8 Dec 2006 02:39:39 -0800
Subject: [PATCH] NFS2: Calculate 'w' a bit later in
 nfsaclsvc_encode_getaclres()

NFS2: Calculate 'w' a bit later in nfsaclsvc_encode_getaclres()
      This is a small performance optimization since we can return before
      needing 'w'. It also saves a few bytes of .text :
      Before:
           text    data     bss     dec     hex filename
           2406     212       0    2618     a3a fs/nfsd/nfs2acl.o
      After:
           text    data     bss     dec     hex filename
           2400     212       0    2612     a34 fs/nfsd/nfs2acl.o

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs2acl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index e3eca081698..edde5dc5f79 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -222,12 +222,10 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 {
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode = dentry->d_inode;
-	int w = nfsacl_size(
-		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
-		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	struct kvec *head = rqstp->rq_res.head;
 	unsigned int base;
 	int n;
+	int w;
 
 	if (dentry == NULL || dentry->d_inode == NULL)
 		return 0;
@@ -239,7 +237,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		return 0;
 	base = (char *)p - (char *)head->iov_base;
 
-	rqstp->rq_res.page_len = w;
+	rqstp->rq_res.page_len = w = nfsacl_size(
+		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	while (w > 0) {
 		if (!rqstp->rq_respages[rqstp->rq_resused++])
 			return 0;
-- 
cgit v1.2.3


From 14d2b59e8c1634ceb995097b162592b0af139578 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Fri, 8 Dec 2006 02:39:40 -0800
Subject: [PATCH] NFS3: Calculate 'w' a bit later in nfs3svc_encode_getaclres()

NFS3: Calculate 'w' a bit later in nfs3svc_encode_getaclres()
      This is a small performance optimization since we can return before
      needing 'w'. It also saves a few bytes of .text :
      Before:
           text    data     bss     dec     hex filename
           1632     140       0    1772     6ec fs/nfsd/nfs3acl.o
      After:
           text    data     bss     dec     hex filename
           1624     140       0    1764     6e4 fs/nfsd/nfs3acl.o

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3acl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index fcad2895ddb..3e3f2de82c3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -171,19 +171,19 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0 && dentry && dentry->d_inode) {
 		struct inode *inode = dentry->d_inode;
-		int w = nfsacl_size(
-			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
-			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		struct kvec *head = rqstp->rq_res.head;
 		unsigned int base;
 		int n;
+		int w;
 
 		*p++ = htonl(resp->mask);
 		if (!xdr_ressize_check(rqstp, p))
 			return 0;
 		base = (char *)p - (char *)head->iov_base;
 
-		rqstp->rq_res.page_len = w;
+		rqstp->rq_res.page_len = w = nfsacl_size(
+			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
 			if (!rqstp->rq_respages[rqstp->rq_resused++])
 				return 0;
-- 
cgit v1.2.3


From 4b3bb06bea649396490094780f90d315c152f6ab Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Fri, 8 Dec 2006 02:39:41 -0800
Subject: [PATCH] nfsd: replace kmalloc+memset with kcalloc + simplify NULL
 check

Replace kmalloc+memset with kcalloc and simplify

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfscache.c |  3 +--
 fs/nfsd/vfs.c      | 37 ++++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6100bbe2743..f90d7047585 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -66,14 +66,13 @@ nfsd_cache_init(void)
 		printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
 			CACHESIZE, CACHESIZE-i);
 
-	hash_list = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
+	hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!hash_list) {
 		nfsd_cache_shutdown();
 		printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
 			HASHSIZE * sizeof(struct hlist_head));
 		return;
 	}
-	memset(hash_list, 0, HASHSIZE * sizeof(struct hlist_head));
 
 	cache_disabled = 0;
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d610edde938..4883d758622 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1885,28 +1885,27 @@ nfsd_racache_init(int cache_size)
 		return 0;
 	if (cache_size < 2*RAPARM_HASH_SIZE)
 		cache_size = 2*RAPARM_HASH_SIZE;
-	raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
-
-	if (raparml != NULL) {
-		dprintk("nfsd: allocating %d readahead buffers.\n",
-			cache_size);
-		for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
-			raparm_hash[i].pb_head = NULL;
-			spin_lock_init(&raparm_hash[i].pb_lock);
-		}
-		nperbucket = cache_size >> RAPARM_HASH_BITS;
-		memset(raparml, 0, sizeof(struct raparms) * cache_size);
-		for (i = 0; i < cache_size - 1; i++) {
-			if (i % nperbucket == 0)
-				raparm_hash[j++].pb_head = raparml + i;
-			if (i % nperbucket < nperbucket-1)
-				raparml[i].p_next = raparml + i + 1;
-		}
-	} else {
+	raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL);
+
+	if (!raparml) {
 		printk(KERN_WARNING
-		       "nfsd: Could not allocate memory read-ahead cache.\n");
+			"nfsd: Could not allocate memory read-ahead cache.\n");
 		return -ENOMEM;
 	}
+
+	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
+	for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
+		raparm_hash[i].pb_head = NULL;
+		spin_lock_init(&raparm_hash[i].pb_lock);
+	}
+	nperbucket = cache_size >> RAPARM_HASH_BITS;
+	for (i = 0; i < cache_size - 1; i++) {
+		if (i % nperbucket == 0)
+			raparm_hash[j++].pb_head = raparml + i;
+		if (i % nperbucket < nperbucket-1)
+			raparml[i].p_next = raparml + i + 1;
+	}
+
 	nfsdstats.ra_size = cache_size;
 	return 0;
 }
-- 
cgit v1.2.3


From c17bb4951752d3e0f49cd1ea9d2e868422f9e0d6 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 8 Dec 2006 02:39:46 -0800
Subject: [PATCH] fault-injection capability for disk IO

This patch provides fault-injection capability for disk IO.

Boot option:

fail_make_request=<probability>,<interval>,<space>,<times>

	<interval> -- specifies the interval of failures.

	<probability> -- specifies how often it should fail in percent.

	<space> -- specifies the size of free space where disk IO can be issued
		   safely in bytes.

	<times> -- specifies how many times failures may happen at most.

Debugfs:

/debug/fail_make_request/interval
/debug/fail_make_request/probability
/debug/fail_make_request/specifies
/debug/fail_make_request/times

Example:

	fail_make_request=10,100,0,-1
	echo 1 > /sys/blocks/hda/hda1/make-it-fail

generic_make_request() on /dev/hda1 fails once per 10 times.

Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 1901137f4ec..3d73d94d93a 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -276,12 +276,39 @@ static struct part_attribute part_attr_stat = {
 	.show	= part_stat_read
 };
 
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+
+static ssize_t part_fail_store(struct hd_struct * p,
+			       const char *buf, size_t count)
+{
+	int i;
+
+	if (count > 0 && sscanf(buf, "%d", &i) > 0)
+		p->make_it_fail = (i == 0) ? 0 : 1;
+
+	return count;
+}
+static ssize_t part_fail_read(struct hd_struct * p, char *page)
+{
+	return sprintf(page, "%d\n", p->make_it_fail);
+}
+static struct part_attribute part_attr_fail = {
+	.attr = {.name = "make-it-fail", .mode = S_IRUGO | S_IWUSR },
+	.store	= part_fail_store,
+	.show	= part_fail_read
+};
+
+#endif
+
 static struct attribute * default_attrs[] = {
 	&part_attr_uevent.attr,
 	&part_attr_dev.attr,
 	&part_attr_start.attr,
 	&part_attr_size.attr,
 	&part_attr_stat.attr,
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+	&part_attr_fail.attr,
+#endif
 	NULL,
 };
 
-- 
cgit v1.2.3


From f4f154fd920b2178382a6a24a236348e4429ebc1 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 8 Dec 2006 02:39:47 -0800
Subject: [PATCH] fault injection: process filtering for fault-injection
 capabilities

This patch provides process filtering feature.
The process filter allows failing only permitted processes
by /proc/<pid>/make-it-fail

Please see the example that demostrates how to inject slab allocation
failures into module init/cleanup code
in Documentation/fault-injection/fault-injection.txt

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3b5074118a..fd959d5b5a8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -853,6 +853,65 @@ static struct file_operations proc_seccomp_operations = {
 };
 #endif /* CONFIG_SECCOMP */
 
+#ifdef CONFIG_FAULT_INJECTION
+static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
+				      size_t count, loff_t *ppos)
+{
+	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	char buffer[PROC_NUMBUF];
+	size_t len;
+	int make_it_fail;
+	loff_t __ppos = *ppos;
+
+	if (!task)
+		return -ESRCH;
+	make_it_fail = task->make_it_fail;
+	put_task_struct(task);
+
+	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
+	if (__ppos >= len)
+		return 0;
+	if (count > len-__ppos)
+		count = len-__ppos;
+	if (copy_to_user(buf, buffer + __ppos, count))
+		return -EFAULT;
+	*ppos = __ppos + count;
+	return count;
+}
+
+static ssize_t proc_fault_inject_write(struct file * file,
+			const char __user * buf, size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	char buffer[PROC_NUMBUF], *end;
+	int make_it_fail;
+
+	if (!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count))
+		return -EFAULT;
+	make_it_fail = simple_strtol(buffer, &end, 0);
+	if (*end == '\n')
+		end++;
+	task = get_proc_task(file->f_dentry->d_inode);
+	if (!task)
+		return -ESRCH;
+	task->make_it_fail = make_it_fail;
+	put_task_struct(task);
+	if (end - buffer == 0)
+		return -EIO;
+	return end - buffer;
+}
+
+static struct file_operations proc_fault_inject_operations = {
+	.read		= proc_fault_inject_read,
+	.write		= proc_fault_inject_write,
+};
+#endif
+
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
@@ -1793,6 +1852,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
 #endif
+#ifdef CONFIG_FAULT_INJECTION
+	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
@@ -2068,6 +2130,9 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, loginuid),
 #endif
+#ifdef CONFIG_FAULT_INJECTION
+	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
-- 
cgit v1.2.3


From 7bf65382caeecea4ae7206138e92e732b676d6e5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 8 Dec 2006 02:41:14 -0800
Subject: [PATCH] proc_misc build fix

fs/proc/proc_misc.c: In function `version_read_proc':
fs/proc/proc_misc.c:256: warning: implicit declaration of function `utsname'

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index f6d6f81dd44..dc3e580d1dc 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -39,6 +39,7 @@
 #include <linux/seq_file.h>
 #include <linux/times.h>
 #include <linux/profile.h>
+#include <linux/utsname.h>
 #include <linux/blkdev.h>
 #include <linux/hugetlb.h>
 #include <linux/jiffies.h>
-- 
cgit v1.2.3


From 14b36af46a1d3652aff6734ea24816995dff8123 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Tue, 5 Dec 2006 17:05:44 +0100
Subject: [MIPS] Rename SNI_RM200_PCI to just SNI_RM preparing for more RM
 machines

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 fs/partitions/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index e478f194183..74552c60b67 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -194,7 +194,7 @@ config LDM_DEBUG
 
 config SGI_PARTITION
 	bool "SGI partition support" if PARTITION_ADVANCED
-	default y if (SGI_IP22 || SGI_IP27 || ((MACH_JAZZ || SNI_RM200_PCI) && !CPU_LITTLE_ENDIAN))
+	default y if (SGI_IP22 || SGI_IP27 || ((MACH_JAZZ || SNI_RM) && !CPU_LITTLE_ENDIAN))
 	help
 	  Say Y here if you would like to be able to read the hard disk
 	  partition table format used by SGI machines.
-- 
cgit v1.2.3


From 8c08540f8755c451d8b96ea14cfe796bc3cd712d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:24 -0800
Subject: [PATCH] clean up __set_page_dirty_nobuffers()

Save a tabstop in __set_page_dirty_nobuffers() and __set_page_dirty_buffers()
and a few other places.  No functional changes.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 517860f2d75..865570fe0d3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -724,20 +724,19 @@ int __set_page_dirty_buffers(struct page *page)
 	}
 	spin_unlock(&mapping->private_lock);
 
-	if (!TestSetPageDirty(page)) {
-		write_lock_irq(&mapping->tree_lock);
-		if (page->mapping) {	/* Race with truncate? */
-			if (mapping_cap_account_dirty(mapping))
-				__inc_zone_page_state(page, NR_FILE_DIRTY);
-			radix_tree_tag_set(&mapping->page_tree,
-						page_index(page),
-						PAGECACHE_TAG_DIRTY);
-		}
-		write_unlock_irq(&mapping->tree_lock);
-		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-		return 1;
+	if (TestSetPageDirty(page))
+		return 0;
+
+	write_lock_irq(&mapping->tree_lock);
+	if (page->mapping) {	/* Race with truncate? */
+		if (mapping_cap_account_dirty(mapping))
+			__inc_zone_page_state(page, NR_FILE_DIRTY);
+		radix_tree_tag_set(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	return 0;
+	write_unlock_irq(&mapping->tree_lock);
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return 1;
 }
 EXPORT_SYMBOL(__set_page_dirty_buffers);
 
-- 
cgit v1.2.3


From 55e829af06681e5d731c03ba04febbd1c76ca293 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:27 -0800
Subject: [PATCH] io-accounting: write accounting

Accounting writes is fairly simple: whenever a process flips a page from clean
to dirty, we accuse it of having caused a write to underlying storage of
PAGE_CACHE_SIZE bytes.

This may overestimate the amount of writing: the page-dirtying may cause only
one buffer_head's worth of writeout.  Fixing that is possible, but probably a
bit messy and isn't obviously important.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 865570fe0d3..904d59d1eb8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -35,6 +35,7 @@
 #include <linux/hash.h>
 #include <linux/suspend.h>
 #include <linux/buffer_head.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
@@ -729,8 +730,10 @@ int __set_page_dirty_buffers(struct page *page)
 
 	write_lock_irq(&mapping->tree_lock);
 	if (page->mapping) {	/* Race with truncate? */
-		if (mapping_cap_account_dirty(mapping))
+		if (mapping_cap_account_dirty(mapping)) {
 			__inc_zone_page_state(page, NR_FILE_DIRTY);
+			task_io_account_write(PAGE_CACHE_SIZE);
+		}
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-- 
cgit v1.2.3


From e08748ce01e02f0ec154b141f392ccb9555333f4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:31 -0800
Subject: [PATCH] io-accounting: write-cancel accounting

Account for the number of byte writes which this process caused to not happen
after all.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 904d59d1eb8..d1f1b54d310 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2853,8 +2853,13 @@ int try_to_free_buffers(struct page *page)
 		 * could encounter a non-uptodate page, which is unresolvable.
 		 * This only applies in the rare case where try_to_free_buffers
 		 * succeeds but the page is not freed.
+		 *
+		 * Also, during truncate, discard_buffer will have marked all
+		 * the page's buffers clean.  We discover that here and clean
+		 * the page also.
 		 */
-		clear_page_dirty(page);
+		if (test_clear_page_dirty(page))
+			task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	}
 out:
 	if (buffers_to_free) {
-- 
cgit v1.2.3


From 6f88cc2e9c29c181557b477ee396375906acbc90 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:44 -0800
Subject: [PATCH] io-accounting-read-accounting cifs fix

CIFS implements ->readpages and doesn't use read_cache_pages().  So wire the
read IO accounting up within CIFS.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: Steven French <sfrench@us.ibm.com>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1aa95a50cac..0f05cab5d24 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -29,6 +29,7 @@
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/delay.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
@@ -1812,6 +1813,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 			cFYI(1, ("Read error in readpages: %d", rc));
 			break;
 		} else if (bytes_read > 0) {
+			task_io_account_read(bytes_read);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
 			cifs_copy_cache_pages(mapping, page_list, bytes_read,
 				smb_read_data + 4 /* RFC1001 hdr */ +
-- 
cgit v1.2.3


From 98c4d57decf97bf8ddfe948a3266aa56b38b1a51 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:47 -0800
Subject: [PATCH] io-accounting: direct-io

Account for direct-io.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5981e17f46f..45d34d80739 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/wait.h>
 #include <linux/err.h>
@@ -675,6 +676,13 @@ submit_page_section(struct dio *dio, struct page *page,
 {
 	int ret = 0;
 
+	if (dio->rw & WRITE) {
+		/*
+		 * Read accounting is performed in submit_bio()
+		 */
+		task_io_account_write(len);
+	}
+
 	/*
 	 * Can we just grow the current page's presence in the dio?
 	 */
-- 
cgit v1.2.3


From aba76fdb8a5fefba73d3490563bf7c4da37b1a34 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:48 -0800
Subject: [PATCH] io-accounting: report in procfs

Add a simple /proc/pid/io to show the IO accounting fields.

Maybe this shouldn't be merged in mainline - the preferred reporting channel
is taskstats.  But given the poor state of our userspace support for
taskstats, this is useful for developer-testing, at least.  And it improves
the changes that the procps developers will wire it up into top(1).  Opinions
are sought.

The patch also wires up the existing IO-accounting fields.

It's a bit racy on 32-bit machines: if process A reads process B's
/proc/pid/io while process B is updating one of those 64-bit counters, process
A could see an intermediate result.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index fd959d5b5a8..77a57b5799c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1804,6 +1804,27 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filld
 				proc_base_instantiate, task, p);
 }
 
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return sprintf(buffer,
+			"rchar: %llu\n"
+			"wchar: %llu\n"
+			"syscr: %llu\n"
+			"syscw: %llu\n"
+			"read_bytes: %llu\n"
+			"write_bytes: %llu\n"
+			"cancelled_write_bytes: %llu\n",
+			(unsigned long long)task->rchar,
+			(unsigned long long)task->wchar,
+			(unsigned long long)task->syscr,
+			(unsigned long long)task->syscw,
+			(unsigned long long)task->ioac.read_bytes,
+			(unsigned long long)task->ioac.write_bytes,
+			(unsigned long long)task->ioac.cancelled_write_bytes);
+}
+#endif
+
 /*
  * Thread groups
  */
@@ -1855,6 +1876,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	INF("io",	S_IRUGO, pid_io_accounting),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
-- 
cgit v1.2.3


From 6d544bb4d9019c3a0d7ee4af1e4bbbd61a6e16dc Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:20:54 -0800
Subject: [PATCH] dio: centralize completion in dio_complete()

There have been a lot of bugs recently due to the way direct_io_worker() tries
to decide how to finish direct IO operations.  In the worst examples it has
failed to call aio_complete() at all (hang) or called it too many times
(oops).

This set of patches cleans up the completion phase with the goal of removing
the complexity that lead to these bugs.  We end up with one path that
calculates the result of the operation after all off the bios have completed.
We decide when to generate a result of the operation using that path based on
the final release of a refcount on the dio structure.

I tried to progress towards the final state in steps that were relatively easy
to understand.  Each step should compile but I only tested the final result of
having all the patches applied.

I've tested these on low end PC drives with aio-stress, the direct IO tests I
could manage to get running in LTP, orasim, and some home-brew functional
tests.

In http://lkml.org/lkml/2006/9/21/103 IBM reports success with ext2 and ext3
running DIO LTP tests.  They found that XFS bug which has since been addressed
in the patch series.

This patch:

The mechanics which decide the result of a direct IO operation were duplicated
in the sync and async paths.

The async path didn't check page_errors which can manifest as silently
returning success when the final pointer in an operation faults and its
matching file region is filled with zeros.

The sync path and async path differed in whether they passed errors to the
caller's dio->end_io operation.  The async path was passing errors to it which
trips an assertion in XFS, though it is apparently harmless.

This centralizes the completion phase of dio ops in one place.  AIO will now
return EFAULT consistently and all paths fall back to the previously sync
behaviour of passing the number of bytes 'transferred' to the dio->end_io
callback, regardless of errors.

dio_await_completion() doesn't have to propogate EIO from non-uptodate bios
now that it's being propogated through dio_complete() via dio->io_error.  This
lets it return void which simplifies its sole caller.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 94 ++++++++++++++++++++++++++--------------------------------
 1 file changed, 42 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 45d34d80739..b57b671e110 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -210,19 +210,46 @@ static struct page *dio_get_page(struct dio *dio)
 	return dio->pages[dio->head++];
 }
 
-/*
- * Called when all DIO BIO I/O has been completed - let the filesystem
- * know, if it registered an interest earlier via get_block.  Pass the
- * private field of the map buffer_head so that filesystems can use it
- * to hold additional state between get_block calls and dio_complete.
+/**
+ * dio_complete() - called when all DIO BIO I/O has been completed
+ * @offset: the byte offset in the file of the completed operation
+ *
+ * This releases locks as dictated by the locking type, lets interested parties
+ * know that a DIO operation has completed, and calculates the resulting return
+ * code for the operation.
+ *
+ * It lets the filesystem know if it registered an interest earlier via
+ * get_block.  Pass the private field of the map buffer_head so that
+ * filesystems can use it to hold additional state between get_block calls and
+ * dio_complete.
  */
-static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
+static int dio_complete(struct dio *dio, loff_t offset, int ret)
 {
+	ssize_t transferred = 0;
+
+	if (dio->result) {
+		transferred = dio->result;
+
+		/* Check for short read case */
+		if ((dio->rw == READ) && ((offset + transferred) > dio->i_size))
+			transferred = dio->i_size - offset;
+	}
+
 	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
+		dio->end_io(dio->iocb, offset, transferred,
+			    dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
 		/* lockdep: non-owner release */
 		up_read_non_owner(&dio->inode->i_alloc_sem);
+
+	if (ret == 0)
+		ret = dio->page_errors;
+	if (ret == 0)
+		ret = dio->io_error;
+	if (ret == 0)
+		ret = transferred;
+
+	return ret;
 }
 
 /*
@@ -236,8 +263,7 @@ static void finished_one_bio(struct dio *dio)
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	if (dio->bio_count == 1) {
 		if (dio->is_async) {
-			ssize_t transferred;
-			loff_t offset;
+			int ret;
 
 			/*
 			 * Last reference to the dio is going away.
@@ -245,24 +271,12 @@ static void finished_one_bio(struct dio *dio)
 			 */
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
 
-			/* Check for short read case */
-			transferred = dio->result;
-			offset = dio->iocb->ki_pos;
-
-			if ((dio->rw == READ) &&
-			    ((offset + transferred) > dio->i_size))
-				transferred = dio->i_size - offset;
-
-			/* check for error in completion path */
-			if (dio->io_error)
-				transferred = dio->io_error;
-
-			dio_complete(dio, offset, transferred);
+			ret = dio_complete(dio, dio->iocb->ki_pos, 0);
 
 			/* Complete AIO later if falling back to buffered i/o */
 			if (dio->result == dio->size ||
 				((dio->rw == READ) && dio->result)) {
-				aio_complete(dio->iocb, transferred, 0);
+				aio_complete(dio->iocb, ret, 0);
 				kfree(dio);
 				return;
 			} else {
@@ -434,10 +448,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 /*
  * Wait on and process all in-flight BIOs.
  */
-static int dio_await_completion(struct dio *dio)
+static void dio_await_completion(struct dio *dio)
 {
-	int ret = 0;
-
 	if (dio->bio)
 		dio_bio_submit(dio);
 
@@ -448,13 +460,9 @@ static int dio_await_completion(struct dio *dio)
 	 */
 	while (dio->bio_count) {
 		struct bio *bio = dio_await_one(dio);
-		int ret2;
-
-		ret2 = dio_bio_complete(dio, bio);
-		if (ret == 0)
-			ret = ret2;
+		/* io errors are propogated through dio->io_error */
+		dio_bio_complete(dio, bio);
 	}
-	return ret;
 }
 
 /*
@@ -1127,28 +1135,10 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 			kfree(dio);
 		}
 	} else {
-		ssize_t transferred = 0;
-
 		finished_one_bio(dio);
-		ret2 = dio_await_completion(dio);
-		if (ret == 0)
-			ret = ret2;
-		if (ret == 0)
-			ret = dio->page_errors;
-		if (dio->result) {
-			loff_t i_size = i_size_read(inode);
+		dio_await_completion(dio);
 
-			transferred = dio->result;
-			/*
-			 * Adjust the return value if the read crossed a
-			 * non-block-aligned EOF.
-			 */
-			if (rw == READ && (offset + transferred > i_size))
-				transferred = i_size - offset;
-		}
-		dio_complete(dio, offset, transferred);
-		if (ret == 0)
-			ret = transferred;
+		ret = dio_complete(dio, offset, ret);
 
 		/* We could have also come here on an AIO file extend */
 		if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
-- 
cgit v1.2.3


From 17a7b1d74b1207f8f1af40b5d184989076d08f8b Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:20:56 -0800
Subject: [PATCH] dio: call blk_run_address_space() once per op

We only need to call blk_run_address_space() once after all the bios for the
direct IO op have been submitted.  This removes the chance of calling
blk_run_address_space() after spurious wake ups as the sync path waits for
bios to drain.  It's also one less difference betwen the sync and async paths.

In the process we remove a redundant dio_bio_submit() that its caller had
already performed.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b57b671e110..b296942ff7d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -404,7 +404,6 @@ static struct bio *dio_await_one(struct dio *dio)
 		if (dio->bio_list == NULL) {
 			dio->waiter = current;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			blk_run_address_space(dio->inode->i_mapping);
 			io_schedule();
 			spin_lock_irqsave(&dio->bio_lock, flags);
 			dio->waiter = NULL;
@@ -450,9 +449,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
  */
 static void dio_await_completion(struct dio *dio)
 {
-	if (dio->bio)
-		dio_bio_submit(dio);
-
 	/*
 	 * The bio_lock is not held for the read of bio_count.
 	 * This is ok since it is the dio_bio_complete() that changes
@@ -1085,6 +1081,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	if (dio->bio)
 		dio_bio_submit(dio);
 
+	/* All IO is now issued, send it on its way */
+	blk_run_address_space(inode->i_mapping);
+
 	/*
 	 * It is possible that, we return short IO due to end of file.
 	 * In that case, we need to release all the pages we got hold on.
@@ -1113,7 +1112,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		if (ret == 0)
 			ret = dio->result;
 		finished_one_bio(dio);		/* This can free the dio */
-		blk_run_address_space(inode->i_mapping);
 		if (should_wait) {
 			unsigned long flags;
 			/*
-- 
cgit v1.2.3


From 0273201e693fd62381f6b1e85b15ffc117d8a46e Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:20:59 -0800
Subject: [PATCH] dio: formalize bio counters as a dio reference count

Previously we had two confusing counts of bio progress.  'bio_count' was
decremented as bios were processed and freed by the dio core.  It was used to
indicate final completion of the dio operation.  'bios_in_flight' reflected
how many bios were between submit_bio() and bio->end_io.  It was used by the
sync path to decide when to wake up and finish completing bios and was ignored
by the async path.

This patch collapses the two notions into one notion of a dio reference count.
 bios hold a dio reference when they're between submit_bio and bio->end_io.

Since bios_in_flight was only used in the sync path it is now equivalent to
dio->refcount - 1 which accounts for direct_io_worker() holding a reference
for the duration of the operation.

dio_bio_complete() -> finished_one_bio() was called from the sync path after
finding bios on the list that the bio->end_io function had deposited.
finished_one_bio() can not drop the dio reference on behalf of these bios now
because bio->end_io already has.  The is_async test in finished_one_bio()
meant that it never actually did anything other than drop the bio_count for
sync callers.  So we remove its refcount decrement, don't call it from
dio_bio_complete(), and hoist its call up into the async dio_bio_complete()
caller after an explicit refcount decrement.  It is renamed dio_complete_aio()
to reflect the remaining work it actually does.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 140 +++++++++++++++++++++++++++------------------------------
 1 file changed, 66 insertions(+), 74 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b296942ff7d..bc1cbf9149f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -121,9 +121,8 @@ struct dio {
 	int page_errors;		/* errno from get_user_pages() */
 
 	/* BIO completion state */
+	atomic_t refcount;		/* direct_io_worker() and bios */
 	spinlock_t bio_lock;		/* protects BIO fields below */
-	int bio_count;			/* nr bios to be completed */
-	int bios_in_flight;		/* nr bios in flight */
 	struct bio *bio_list;		/* singly linked via bi_private */
 	struct task_struct *waiter;	/* waiting task (NULL if none) */
 
@@ -256,44 +255,27 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
  * Called when a BIO has been processed.  If the count goes to zero then IO is
  * complete and we can signal this to the AIO layer.
  */
-static void finished_one_bio(struct dio *dio)
+static void dio_complete_aio(struct dio *dio)
 {
 	unsigned long flags;
+	int ret;
 
-	spin_lock_irqsave(&dio->bio_lock, flags);
-	if (dio->bio_count == 1) {
-		if (dio->is_async) {
-			int ret;
-
-			/*
-			 * Last reference to the dio is going away.
-			 * Drop spinlock and complete the DIO.
-			 */
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
-
-			ret = dio_complete(dio, dio->iocb->ki_pos, 0);
+	ret = dio_complete(dio, dio->iocb->ki_pos, 0);
 
-			/* Complete AIO later if falling back to buffered i/o */
-			if (dio->result == dio->size ||
-				((dio->rw == READ) && dio->result)) {
-				aio_complete(dio->iocb, ret, 0);
-				kfree(dio);
-				return;
-			} else {
-				/*
-				 * Falling back to buffered
-				 */
-				spin_lock_irqsave(&dio->bio_lock, flags);
-				dio->bio_count--;
-				if (dio->waiter)
-					wake_up_process(dio->waiter);
-				spin_unlock_irqrestore(&dio->bio_lock, flags);
-				return;
-			}
-		}
+	/* Complete AIO later if falling back to buffered i/o */
+	if (dio->result == dio->size ||
+		((dio->rw == READ) && dio->result)) {
+		aio_complete(dio->iocb, ret, 0);
+		kfree(dio);
+	} else {
+		/*
+		 * Falling back to buffered
+		 */
+		spin_lock_irqsave(&dio->bio_lock, flags);
+		if (dio->waiter)
+			wake_up_process(dio->waiter);
+		spin_unlock_irqrestore(&dio->bio_lock, flags);
 	}
-	dio->bio_count--;
-	spin_unlock_irqrestore(&dio->bio_lock, flags);
 }
 
 static int dio_bio_complete(struct dio *dio, struct bio *bio);
@@ -309,6 +291,10 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
+
+	if (atomic_dec_and_test(&dio->refcount))
+		dio_complete_aio(dio);
+
 	return 0;
 }
 
@@ -330,8 +316,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	bio->bi_private = dio->bio_list;
 	dio->bio_list = bio;
-	dio->bios_in_flight--;
-	if (dio->waiter && dio->bios_in_flight == 0)
+	if ((atomic_sub_return(1, &dio->refcount) == 1) && dio->waiter)
 		wake_up_process(dio->waiter);
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 	return 0;
@@ -362,17 +347,15 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
  * In the AIO read case we speculatively dirty the pages before starting IO.
  * During IO completion, any of these pages which happen to have been written
  * back will be redirtied by bio_check_pages_dirty().
+ *
+ * bios hold a dio reference between submit_bio and ->end_io.
  */
 static void dio_bio_submit(struct dio *dio)
 {
 	struct bio *bio = dio->bio;
-	unsigned long flags;
 
 	bio->bi_private = dio;
-	spin_lock_irqsave(&dio->bio_lock, flags);
-	dio->bio_count++;
-	dio->bios_in_flight++;
-	spin_unlock_irqrestore(&dio->bio_lock, flags);
+	atomic_inc(&dio->refcount);
 	if (dio->is_async && dio->rw == READ)
 		bio_set_pages_dirty(bio);
 	submit_bio(dio->rw, bio);
@@ -390,18 +373,28 @@ static void dio_cleanup(struct dio *dio)
 		page_cache_release(dio_get_page(dio));
 }
 
+static int wait_for_more_bios(struct dio *dio)
+{
+	assert_spin_locked(&dio->bio_lock);
+
+	return (atomic_read(&dio->refcount) > 1) && (dio->bio_list == NULL);
+}
+
 /*
- * Wait for the next BIO to complete.  Remove it and return it.
+ * Wait for the next BIO to complete.  Remove it and return it.  NULL is
+ * returned once all BIOs have been completed.  This must only be called once
+ * all bios have been issued so that dio->refcount can only decrease.  This
+ * requires that that the caller hold a reference on the dio.
  */
 static struct bio *dio_await_one(struct dio *dio)
 {
 	unsigned long flags;
-	struct bio *bio;
+	struct bio *bio = NULL;
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
-	while (dio->bio_list == NULL) {
+	while (wait_for_more_bios(dio)) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (dio->bio_list == NULL) {
+		if (wait_for_more_bios(dio)) {
 			dio->waiter = current;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
 			io_schedule();
@@ -410,8 +403,10 @@ static struct bio *dio_await_one(struct dio *dio)
 		}
 		set_current_state(TASK_RUNNING);
 	}
-	bio = dio->bio_list;
-	dio->bio_list = bio->bi_private;
+	if (dio->bio_list) {
+		bio = dio->bio_list;
+		dio->bio_list = bio->bi_private;
+	}
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 	return bio;
 }
@@ -440,25 +435,24 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 		}
 		bio_put(bio);
 	}
-	finished_one_bio(dio);
 	return uptodate ? 0 : -EIO;
 }
 
 /*
- * Wait on and process all in-flight BIOs.
+ * Wait on and process all in-flight BIOs.  This must only be called once
+ * all bios have been issued so that the refcount can only decrease.
+ * This just waits for all bios to make it through dio_bio_complete.  IO
+ * errors are propogated through dio->io_error and should be propogated via
+ * dio_complete().
  */
 static void dio_await_completion(struct dio *dio)
 {
-	/*
-	 * The bio_lock is not held for the read of bio_count.
-	 * This is ok since it is the dio_bio_complete() that changes
-	 * bio_count.
-	 */
-	while (dio->bio_count) {
-		struct bio *bio = dio_await_one(dio);
-		/* io errors are propogated through dio->io_error */
-		dio_bio_complete(dio, bio);
-	}
+	struct bio *bio;
+	do {
+		bio = dio_await_one(dio);
+		if (bio)
+			dio_bio_complete(dio, bio);
+	} while (bio);
 }
 
 /*
@@ -995,16 +989,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
 
-	/*
-	 * BIO completion state.
-	 *
-	 * ->bio_count starts out at one, and we decrement it to zero after all
-	 * BIOs are submitted.  This to avoid the situation where a really fast
-	 * (or synchronous) device could take the count to zero while we're
-	 * still submitting BIOs.
-	 */
-	dio->bio_count = 1;
-	dio->bios_in_flight = 0;
+	atomic_set(&dio->refcount, 1);
 	spin_lock_init(&dio->bio_lock);
 	dio->bio_list = NULL;
 	dio->waiter = NULL;
@@ -1111,7 +1096,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		}
 		if (ret == 0)
 			ret = dio->result;
-		finished_one_bio(dio);		/* This can free the dio */
+
+		/* this can free the dio */
+		if (atomic_dec_and_test(&dio->refcount))
+			dio_complete_aio(dio);
+
 		if (should_wait) {
 			unsigned long flags;
 			/*
@@ -1122,7 +1111,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 			spin_lock_irqsave(&dio->bio_lock, flags);
 			set_current_state(TASK_UNINTERRUPTIBLE);
-			while (dio->bio_count) {
+			while (atomic_read(&dio->refcount)) {
 				spin_unlock_irqrestore(&dio->bio_lock, flags);
 				io_schedule();
 				spin_lock_irqsave(&dio->bio_lock, flags);
@@ -1133,7 +1122,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 			kfree(dio);
 		}
 	} else {
-		finished_one_bio(dio);
 		dio_await_completion(dio);
 
 		ret = dio_complete(dio, offset, ret);
@@ -1146,7 +1134,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 			 * i/o, we have to mark the the aio complete.
 			 */
 			aio_complete(iocb, ret, 0);
-		kfree(dio);
+
+		if (atomic_dec_and_test(&dio->refcount))
+			kfree(dio);
+		else
+			BUG();
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 20258b2b397031649e4a41922fe803d57017df84 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:21:01 -0800
Subject: [PATCH] dio: remove duplicate bio wait code

Now that we have a single refcount and waiting path we can reuse it in the
async 'should_wait' path.  It continues to rely on the fragile link between
the conditional in dio_complete_aio() which decides to complete the AIO and
the conditional in direct_io_worker() which decides to wait and free.

By waiting before dropping the reference we stop dio_bio_end_aio() from
calling dio_complete_aio() which used to wake up the waiter after seeing the
reference count drop to 0.  We hoist this wake up into dio_bio_end_aio() which
now notices when it's left a single remaining reference that is held by the
waiter.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index bc1cbf9149f..f11f05dc9e6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -257,7 +257,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
  */
 static void dio_complete_aio(struct dio *dio)
 {
-	unsigned long flags;
 	int ret;
 
 	ret = dio_complete(dio, dio->iocb->ki_pos, 0);
@@ -267,14 +266,6 @@ static void dio_complete_aio(struct dio *dio)
 		((dio->rw == READ) && dio->result)) {
 		aio_complete(dio->iocb, ret, 0);
 		kfree(dio);
-	} else {
-		/*
-		 * Falling back to buffered
-		 */
-		spin_lock_irqsave(&dio->bio_lock, flags);
-		if (dio->waiter)
-			wake_up_process(dio->waiter);
-		spin_unlock_irqrestore(&dio->bio_lock, flags);
 	}
 }
 
@@ -285,6 +276,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
 static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 {
 	struct dio *dio = bio->bi_private;
+	int waiter_holds_ref = 0;
+	int remaining;
 
 	if (bio->bi_size)
 		return 1;
@@ -292,7 +285,12 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
 
-	if (atomic_dec_and_test(&dio->refcount))
+	waiter_holds_ref = !!dio->waiter;
+	remaining = atomic_sub_return(1, (&dio->refcount));
+	if (remaining == 1 && waiter_holds_ref)
+		wake_up_process(dio->waiter);
+
+	if (remaining == 0)
 		dio_complete_aio(dio);
 
 	return 0;
@@ -1097,30 +1095,15 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		if (ret == 0)
 			ret = dio->result;
 
+		if (should_wait)
+			dio_await_completion(dio);
+
 		/* this can free the dio */
 		if (atomic_dec_and_test(&dio->refcount))
 			dio_complete_aio(dio);
 
-		if (should_wait) {
-			unsigned long flags;
-			/*
-			 * Wait for already issued I/O to drain out and
-			 * release its references to user-space pages
-			 * before returning to fallback on buffered I/O
-			 */
-
-			spin_lock_irqsave(&dio->bio_lock, flags);
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			while (atomic_read(&dio->refcount)) {
-				spin_unlock_irqrestore(&dio->bio_lock, flags);
-				io_schedule();
-				spin_lock_irqsave(&dio->bio_lock, flags);
-				set_current_state(TASK_UNINTERRUPTIBLE);
-			}
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			set_current_state(TASK_RUNNING);
+		if (should_wait)
 			kfree(dio);
-		}
 	} else {
 		dio_await_completion(dio);
 
-- 
cgit v1.2.3


From 8459d86aff04fa53c2ab6a6b9f355b3063cc8014 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:21:05 -0800
Subject: [PATCH] dio: only call aio_complete() after returning -EIOCBQUEUED

The only time it is safe to call aio_complete() is when the ->ki_retry
function returns -EIOCBQUEUED to the AIO core.  direct_io_worker() has
historically done this by relying on its caller to translate positive return
codes into -EIOCBQUEUED for the aio case.  It did this by trying to keep
conditionals in sync.  direct_io_worker() knew when finished_one_bio() was
going to call aio_complete().  It would reverse the test and wait and free the
dio in the cases it thought that finished_one_bio() wasn't going to.

Not surprisingly, it ended up getting it wrong.  'ret' could be a negative
errno from the submission path but it failed to communicate this to
finished_one_bio().  direct_io_worker() would return < 0, it's callers
wouldn't raise -EIOCBQUEUED, and aio_complete() would be called.  In the
future finished_one_bio()'s tests wouldn't reflect this and aio_complete()
would be called for a second time which can manifest as an oops.

The previous cleanups have whittled the sync and async completion paths down
to the point where we can collapse them and clearly reassert the invariant
that we must only call aio_complete() after returning -EIOCBQUEUED.
direct_io_worker() will only return -EIOCBQUEUED when it is not the last to
drop the dio refcount and the aio bio completion path will only call
aio_complete() when it is the last to drop the dio refcount.
direct_io_worker() can ensure that it is the last to drop the reference count
by waiting for bios to drain.  It does this for sync ops, of course, and for
partial dio writes that must fall back to buffered and for aio ops that saw
errors during submission.

This means that operations that end up waiting, even if they were issued as
aio ops, will not call aio_complete() from dio.  Instead we return the return
code of the operation and let the aio core call aio_complete().  This is
purposely done to fix a bug where AIO DIO file extensions would call
aio_complete() before their callers have a chance to update i_size.

Now that direct_io_worker() is explicitly returning -EIOCBQUEUED its callers
no longer have to translate for it.  XFS needs to be careful not to free
resources that will be used during AIO completion if -EIOCBQUEUED is returned.
 We maintain the previous behaviour of trying to write fs metadata for O_SYNC
aio+dio writes.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Cc: <xfs-masters@oss.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c              | 90 ++++++++++++++++++---------------------------
 fs/xfs/linux-2.6/xfs_aops.c |  2 +-
 2 files changed, 36 insertions(+), 56 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index f11f05dc9e6..71f4aeac763 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -226,6 +226,15 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 {
 	ssize_t transferred = 0;
 
+	/*
+	 * AIO submission can race with bio completion to get here while
+	 * expecting to have the last io completed by bio completion.
+	 * In that case -EIOCBQUEUED is in fact not an error we want
+	 * to preserve through this call.
+	 */
+	if (ret == -EIOCBQUEUED)
+		ret = 0;
+
 	if (dio->result) {
 		transferred = dio->result;
 
@@ -251,24 +260,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	return ret;
 }
 
-/*
- * Called when a BIO has been processed.  If the count goes to zero then IO is
- * complete and we can signal this to the AIO layer.
- */
-static void dio_complete_aio(struct dio *dio)
-{
-	int ret;
-
-	ret = dio_complete(dio, dio->iocb->ki_pos, 0);
-
-	/* Complete AIO later if falling back to buffered i/o */
-	if (dio->result == dio->size ||
-		((dio->rw == READ) && dio->result)) {
-		aio_complete(dio->iocb, ret, 0);
-		kfree(dio);
-	}
-}
-
 static int dio_bio_complete(struct dio *dio, struct bio *bio);
 /*
  * Asynchronous IO callback. 
@@ -290,8 +281,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 	if (remaining == 1 && waiter_holds_ref)
 		wake_up_process(dio->waiter);
 
-	if (remaining == 0)
-		dio_complete_aio(dio);
+	if (remaining == 0) {
+		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
+		aio_complete(dio->iocb, ret, 0);
+		kfree(dio);
+	}
 
 	return 0;
 }
@@ -1082,47 +1076,33 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
-	 * OK, all BIOs are submitted, so we can decrement bio_count to truly
-	 * reflect the number of to-be-processed BIOs.
+	 * The only time we want to leave bios in flight is when a successful
+	 * partial aio read or full aio write have been setup.  In that case
+	 * bio completion will call aio_complete.  The only time it's safe to
+	 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
+	 * This had *better* be the only place that raises -EIOCBQUEUED.
 	 */
-	if (dio->is_async) {
-		int should_wait = 0;
-
-		if (dio->result < dio->size && (rw & WRITE)) {
-			dio->waiter = current;
-			should_wait = 1;
-		}
-		if (ret == 0)
-			ret = dio->result;
-
-		if (should_wait)
-			dio_await_completion(dio);
-
-		/* this can free the dio */
-		if (atomic_dec_and_test(&dio->refcount))
-			dio_complete_aio(dio);
+	BUG_ON(ret == -EIOCBQUEUED);
+	if (dio->is_async && ret == 0 && dio->result &&
+	    ((rw & READ) || (dio->result == dio->size)))
+		ret = -EIOCBQUEUED;
 
-		if (should_wait)
-			kfree(dio);
-	} else {
+	if (ret != -EIOCBQUEUED)
 		dio_await_completion(dio);
 
+	/*
+	 * Sync will always be dropping the final ref and completing the
+	 * operation.  AIO can if it was a broken operation described above
+	 * or in fact if all the bios race to complete before we get here.
+	 * In that case dio_complete() translates the EIOCBQUEUED into
+	 * the proper return code that the caller will hand to aio_complete().
+	 */
+	if (atomic_dec_and_test(&dio->refcount)) {
 		ret = dio_complete(dio, offset, ret);
+		kfree(dio);
+	} else
+		BUG_ON(ret != -EIOCBQUEUED);
 
-		/* We could have also come here on an AIO file extend */
-		if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
-		    ret >= 0 && dio->result == dio->size)
-			/*
-			 * For AIO writes where we have completed the
-			 * i/o, we have to mark the the aio complete.
-			 */
-			aio_complete(iocb, ret, 0);
-
-		if (atomic_dec_and_test(&dio->refcount))
-			kfree(dio);
-		else
-			BUG();
-	}
 	return ret;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8e6b56fc1ca..b56eb754e2d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,7 @@ xfs_vm_direct_IO(
 			xfs_end_io_direct);
 	}
 
-	if (unlikely(ret <= 0 && iocb->private))
+	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
 	return ret;
 }
-- 
cgit v1.2.3


From 5eb6c7a2ab413dea1ee6c08dd58263a1c2c2efa3 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 10 Dec 2006 02:21:07 -0800
Subject: [PATCH] dio: lock refcount operations

The wait_for_more_bios() function name was poorly chosen.  While looking to
clean it up it I noticed that the dio struct refcounting between the bio
completion and dio submission paths was racey.

The bio submission path was simply freeing the dio struct if
atomic_dec_and_test() indicated that it dropped the final reference.

The aio bio completion path was dereferencing its dio struct pointer *after
dropping its reference* based on the remaining number of references.

These two paths could race and result in the aio bio completion path
dereferencing a freed dio, though this was not observed in the wild.

This moves the refcount under the bio lock so that bio completion can drop
its reference and decide to wake all in one atomic step.

Once testing and waking is locked dio_await_one() can test its sleeping
condition and mark itself uninterruptible under the lock.  It gets simpler
and wait_for_more_bios() disappears.

The addition of the interrupt masking spin lock acquiry in dio_bio_submit()
looks alarming.  This lock acquiry existed in that path before the recent
dio completion patch set.  We shouldn't expect significant performance
regression from returning to the behaviour that existed before the
completion clean up work.

This passed 4k block ext3 O_DIRECT fsx and aio-stress on an SMP machine.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: <xfs-masters@oss.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c | 76 ++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 45 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 71f4aeac763..d9d0833444f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -121,8 +121,8 @@ struct dio {
 	int page_errors;		/* errno from get_user_pages() */
 
 	/* BIO completion state */
-	atomic_t refcount;		/* direct_io_worker() and bios */
 	spinlock_t bio_lock;		/* protects BIO fields below */
+	unsigned long refcount;		/* direct_io_worker() and bios */
 	struct bio *bio_list;		/* singly linked via bi_private */
 	struct task_struct *waiter;	/* waiting task (NULL if none) */
 
@@ -267,8 +267,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
 static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 {
 	struct dio *dio = bio->bi_private;
-	int waiter_holds_ref = 0;
-	int remaining;
+	unsigned long remaining;
+	unsigned long flags;
 
 	if (bio->bi_size)
 		return 1;
@@ -276,10 +276,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
 
-	waiter_holds_ref = !!dio->waiter;
-	remaining = atomic_sub_return(1, (&dio->refcount));
-	if (remaining == 1 && waiter_holds_ref)
+	spin_lock_irqsave(&dio->bio_lock, flags);
+	remaining = --dio->refcount;
+	if (remaining == 1 && dio->waiter)
 		wake_up_process(dio->waiter);
+	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
 		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
@@ -308,7 +309,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	bio->bi_private = dio->bio_list;
 	dio->bio_list = bio;
-	if ((atomic_sub_return(1, &dio->refcount) == 1) && dio->waiter)
+	if (--dio->refcount == 1 && dio->waiter)
 		wake_up_process(dio->waiter);
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 	return 0;
@@ -345,11 +346,17 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
 static void dio_bio_submit(struct dio *dio)
 {
 	struct bio *bio = dio->bio;
+	unsigned long flags;
 
 	bio->bi_private = dio;
-	atomic_inc(&dio->refcount);
+
+	spin_lock_irqsave(&dio->bio_lock, flags);
+	dio->refcount++;
+	spin_unlock_irqrestore(&dio->bio_lock, flags);
+
 	if (dio->is_async && dio->rw == READ)
 		bio_set_pages_dirty(bio);
+
 	submit_bio(dio->rw, bio);
 
 	dio->bio = NULL;
@@ -365,13 +372,6 @@ static void dio_cleanup(struct dio *dio)
 		page_cache_release(dio_get_page(dio));
 }
 
-static int wait_for_more_bios(struct dio *dio)
-{
-	assert_spin_locked(&dio->bio_lock);
-
-	return (atomic_read(&dio->refcount) > 1) && (dio->bio_list == NULL);
-}
-
 /*
  * Wait for the next BIO to complete.  Remove it and return it.  NULL is
  * returned once all BIOs have been completed.  This must only be called once
@@ -384,16 +384,21 @@ static struct bio *dio_await_one(struct dio *dio)
 	struct bio *bio = NULL;
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
-	while (wait_for_more_bios(dio)) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (wait_for_more_bios(dio)) {
-			dio->waiter = current;
-			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			io_schedule();
-			spin_lock_irqsave(&dio->bio_lock, flags);
-			dio->waiter = NULL;
-		}
-		set_current_state(TASK_RUNNING);
+
+	/*
+	 * Wait as long as the list is empty and there are bios in flight.  bio
+	 * completion drops the count, maybe adds to the list, and wakes while
+	 * holding the bio_lock so we don't need set_current_state()'s barrier
+	 * and can call it after testing our condition.
+	 */
+	while (dio->refcount > 1 && dio->bio_list == NULL) {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		dio->waiter = current;
+		spin_unlock_irqrestore(&dio->bio_lock, flags);
+		io_schedule();
+		/* wake up sets us TASK_RUNNING */
+		spin_lock_irqsave(&dio->bio_lock, flags);
+		dio->waiter = NULL;
 	}
 	if (dio->bio_list) {
 		bio = dio->bio_list;
@@ -951,6 +956,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	struct dio *dio)
 {
 	unsigned long user_addr; 
+	unsigned long flags;
 	int seg;
 	ssize_t ret = 0;
 	ssize_t ret2;
@@ -981,8 +987,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->iocb = iocb;
 	dio->i_size = i_size_read(inode);
 
-	atomic_set(&dio->refcount, 1);
 	spin_lock_init(&dio->bio_lock);
+	dio->refcount = 1;
 	dio->bio_list = NULL;
 	dio->waiter = NULL;
 
@@ -1092,12 +1098,20 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 	/*
 	 * Sync will always be dropping the final ref and completing the
-	 * operation.  AIO can if it was a broken operation described above
-	 * or in fact if all the bios race to complete before we get here.
-	 * In that case dio_complete() translates the EIOCBQUEUED into
-	 * the proper return code that the caller will hand to aio_complete().
+	 * operation.  AIO can if it was a broken operation described above or
+	 * in fact if all the bios race to complete before we get here.  In
+	 * that case dio_complete() translates the EIOCBQUEUED into the proper
+	 * return code that the caller will hand to aio_complete().
+	 *
+	 * This is managed by the bio_lock instead of being an atomic_t so that
+	 * completion paths can drop their ref and use the remaining count to
+	 * decide to wake the submission path atomically.
 	 */
-	if (atomic_dec_and_test(&dio->refcount)) {
+	spin_lock_irqsave(&dio->bio_lock, flags);
+	ret2 = --dio->refcount;
+	spin_unlock_irqrestore(&dio->bio_lock, flags);
+	BUG_ON(!dio->is_async && ret2 != 0);
+	if (ret2 == 0) {
 		ret = dio_complete(dio, offset, ret);
 		kfree(dio);
 	} else
-- 
cgit v1.2.3


From bbea9f69668a3d0cf9feba15a724cd02896f8675 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:12 -0800
Subject: [PATCH] fdtable: Make fdarray and fdsets equal in size

Currently, each fdtable supports three dynamically-sized arrays of data: the
fdarray and two fdsets.  The code allows the number of fds supported by the
fdarray (fdtable->max_fds) to differ from the number of fds supported by each
of the fdsets (fdtable->max_fdset).

In practice, it is wasteful for these two sizes to differ: whenever we hit a
limit on the smaller-capacity structure, we will reallocate the entire fdtable
and all the dynamic arrays within it, so any delta in the memory used by the
larger-capacity structure will never be touched at all.

Rather than hogging this excess, we shouldn't even allocate it in the first
place, and keep the capacities of the fdarray and the fdsets equal.  This
patch removes fdtable->max_fdset.  As an added bonus, most of the supporting
code becomes simpler.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 10 +++++-----
 fs/exec.c   |  2 +-
 fs/fcntl.c  |  5 ++---
 fs/file.c   | 56 ++++++++++++++++++++++----------------------------------
 fs/open.c   |  3 +--
 fs/select.c | 10 +++++-----
 6 files changed, 36 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index b766964a625..0ec70e3cee0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1679,19 +1679,19 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 {
 	fd_set_bits fds;
 	char *bits;
-	int size, max_fdset, ret = -EINVAL;
+	int size, max_fds, ret = -EINVAL;
 	struct fdtable *fdt;
 
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
diff --git a/fs/exec.c b/fs/exec.c
index 12d8cd461b4..11fe93f7363 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -783,7 +783,7 @@ static void flush_old_files(struct files_struct * files)
 		j++;
 		i = j * __NFDBITS;
 		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds || i >= fdt->max_fdset)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->close_on_exec->fds_bits[j];
 		if (!set)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2bdaef35da5..8e382a5d51b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -77,10 +77,9 @@ repeat:
 		start = files->next_fd;
 
 	newfd = start;
-	if (start < fdt->max_fdset) {
+	if (start < fdt->max_fds)
 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-			fdt->max_fdset, start);
-	}
+					   fdt->max_fds, start);
 	
 	error = -EMFILE;
 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
diff --git a/fs/file.c b/fs/file.c
index 51aef675470..fb3d2038dc2 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -68,8 +68,8 @@ void free_fd_array(struct file **array, int num)
 
 static void __free_fdtable(struct fdtable *fdt)
 {
-	free_fdset(fdt->open_fds, fdt->max_fdset);
-	free_fdset(fdt->close_on_exec, fdt->max_fdset);
+	free_fdset(fdt->open_fds, fdt->max_fds);
+	free_fdset(fdt->close_on_exec, fdt->max_fds);
 	free_fd_array(fdt->fd, fdt->max_fds);
 	kfree(fdt);
 }
@@ -98,7 +98,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	struct fdtable_defer *fddef;
 
 	BUG_ON(!fdt);
-	fdset_size = fdt->max_fdset / 8;
+	fdset_size = fdt->max_fds / 8;
 	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
 	if (fdt->free_files) {
@@ -110,13 +110,11 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
-		fdt->max_fds <= NR_OPEN_DEFAULT) {
+	if (fdt->max_fds <= NR_OPEN_DEFAULT)
 		/*
 		 * The fdtable was embedded
 		 */
 		return;
-	}
 	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
 		kfree(fdt->open_fds);
 		kfree(fdt->close_on_exec);
@@ -136,9 +134,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 
 void free_fdtable(struct fdtable *fdt)
 {
-	if (fdt->free_files ||
-		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
-		fdt->max_fds > NR_OPEN_DEFAULT)
+	if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
@@ -151,12 +147,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 	int i;
 	int count;
 
-	BUG_ON(nfdt->max_fdset < fdt->max_fdset);
 	BUG_ON(nfdt->max_fds < fdt->max_fds);
 	/* Copy the existing tables and install the new pointers */
 
-	i = fdt->max_fdset / (sizeof(unsigned long) * 8);
-	count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+	i = fdt->max_fds / (sizeof(unsigned long) * 8);
+	count = (nfdt->max_fds - fdt->max_fds) / 8;
 
 	/*
 	 * Don't copy the entire array if the current fdset is
@@ -164,9 +159,9 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 	 */
 	if (i) {
 		memcpy (nfdt->open_fds, fdt->open_fds,
-						fdt->max_fdset/8);
+						fdt->max_fds/8);
 		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
-						fdt->max_fdset/8);
+						fdt->max_fds/8);
 		memset (&nfdt->open_fds->fds_bits[i], 0, count);
 		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
 	}
@@ -201,7 +196,7 @@ fd_set * alloc_fdset(int num)
 
 void free_fdset(fd_set *array, int num)
 {
-	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
+	if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */
 		return;
 	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
@@ -220,18 +215,6 @@ static struct fdtable *alloc_fdtable(int nr)
 	if (!fdt)
   		goto out;
 
-	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
-	if (nfds > NR_OPEN)
-		nfds = NR_OPEN;
-
-  	new_openset = alloc_fdset(nfds);
-  	new_execset = alloc_fdset(nfds);
-  	if (!new_openset || !new_execset)
-  		goto out;
-	fdt->open_fds = new_openset;
-	fdt->close_on_exec = new_execset;
-	fdt->max_fdset = nfds;
-
 	nfds = NR_OPEN_DEFAULT;
 	/*
 	 * Expand to the max in easy steps, and keep expanding it until
@@ -251,15 +234,21 @@ static struct fdtable *alloc_fdtable(int nr)
 				nfds = NR_OPEN;
   		}
 	} while (nfds <= nr);
+
+  	new_openset = alloc_fdset(nfds);
+  	new_execset = alloc_fdset(nfds);
+  	if (!new_openset || !new_execset)
+  		goto out;
+	fdt->open_fds = new_openset;
+	fdt->close_on_exec = new_execset;
+
 	new_fds = alloc_fd_array(nfds);
 	if (!new_fds)
-		goto out2;
+		goto out;
 	fdt->fd = new_fds;
 	fdt->max_fds = nfds;
 	fdt->free_files = NULL;
 	return fdt;
-out2:
-	nfds = fdt->max_fdset;
 out:
 	free_fdset(new_openset, nfds);
 	free_fdset(new_execset, nfds);
@@ -290,7 +279,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 	 * we dropped the lock
 	 */
 	cur_fdt = files_fdtable(files);
-	if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
+	if (nr >= cur_fdt->max_fds) {
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
@@ -316,11 +305,10 @@ int expand_files(struct files_struct *files, int nr)
 
 	fdt = files_fdtable(files);
 	/* Do we need to expand? */
-	if (nr < fdt->max_fdset && nr < fdt->max_fds)
+	if (nr < fdt->max_fds)
 		return 0;
 	/* Can we expand? */
-	if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
-	    nr >= NR_OPEN)
+	if (nr >= NR_OPEN)
 		return -EMFILE;
 
 	/* All good, so we try */
diff --git a/fs/open.c b/fs/open.c
index 0d94319e868..c989fb4cf7b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -864,8 +864,7 @@ int get_unused_fd(void)
 
 repeat:
 	fdt = files_fdtable(files);
- 	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
-				fdt->max_fdset,
+	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
 				files->next_fd);
 
 	/*
diff --git a/fs/select.c b/fs/select.c
index dcbc1112b7e..fe0893afd93 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -311,7 +311,7 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 {
 	fd_set_bits fds;
 	void *bits;
-	int ret, max_fdset;
+	int ret, max_fds;
 	unsigned int size;
 	struct fdtable *fdt;
 	/* Allocate small arguments on the stack to save memory and be faster */
@@ -321,13 +321,13 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
-- 
cgit v1.2.3


From 4fd45812cbe875a620c86a096a5d46c742694b7e Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:17 -0800
Subject: [PATCH] fdtable: Remove the free_files field

An fdtable can either be embedded inside a files_struct or standalone (after
being expanded).  When an fdtable is being discarded after all RCU references
to it have expired, we must either free it directly, in the standalone case,
or free the files_struct it is contained within, in the embedded case.

Currently the free_files field controls this behavior, but we can get rid of
it entirely, as all the necessary information is already recorded.  We can
distinguish embedded and standalone fdtables using max_fds, and if it is
embedded we can divine the relevant files_struct using container_of().

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index fb3d2038dc2..17e6a55521e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,7 +91,7 @@ static void free_fdtable_work(struct work_struct *work)
 	}
 }
 
-static void free_fdtable_rcu(struct rcu_head *rcu)
+void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
 	int fdset_size, fdarray_size;
@@ -101,20 +101,15 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	fdset_size = fdt->max_fds / 8;
 	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
-	if (fdt->free_files) {
+	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
-		 * The this fdtable was embedded in the files structure
-		 * and the files structure itself was getting destroyed.
-		 * It is now safe to free the files structure.
+		 * This fdtable is embedded in the files structure and that
+		 * structure itself is getting destroyed.
 		 */
-		kmem_cache_free(files_cachep, fdt->free_files);
+		kmem_cache_free(files_cachep,
+				container_of(fdt, struct files_struct, fdtab));
 		return;
 	}
-	if (fdt->max_fds <= NR_OPEN_DEFAULT)
-		/*
-		 * The fdtable was embedded
-		 */
-		return;
 	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
 		kfree(fdt->open_fds);
 		kfree(fdt->close_on_exec);
@@ -132,12 +127,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	}
 }
 
-void free_fdtable(struct fdtable *fdt)
-{
-	if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT)
-		call_rcu(&fdt->rcu, free_fdtable_rcu);
-}
-
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
@@ -247,7 +236,6 @@ static struct fdtable *alloc_fdtable(int nr)
 		goto out;
 	fdt->fd = new_fds;
 	fdt->max_fds = nfds;
-	fdt->free_files = NULL;
 	return fdt;
 out:
 	free_fdset(new_openset, nfds);
@@ -283,7 +271,8 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
-		free_fdtable(cur_fdt);
+		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		__free_fdtable(new_fdt);
-- 
cgit v1.2.3


From 5466b456ed6748e0bfe02831e570004d4c04c1d7 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:22 -0800
Subject: [PATCH] fdtable: Implement new pagesize-based fdtable allocator

This patch provides an improved fdtable allocation scheme, useful for
expanding fdtable file descriptor entries.  The main focus is on the fdarray,
as its memory usage grows 128 times faster than that of an fdset.

The allocation algorithm sizes the fdarray in such a way that its memory usage
increases in easy page-sized chunks. The overall algorithm expands the allowed
size in powers of two, in order to amortize the cost of invoking vmalloc() for
larger allocation sizes. Namely, the following sizes for the fdarray are
considered, and the smallest that accommodates the requested fd count is
chosen:

    pagesize / 4
    pagesize / 2
    pagesize      <- memory allocator switch point
    pagesize * 2
    pagesize * 4
    ...etc...

Unlike the current implementation, this allocation scheme does not require a
loop to compute the optimal fdarray size, and can be done in efficient
straightline code.

Furthermore, since the fdarray overflows the pagesize boundary long before any
of the fdsets do, it makes sense to optimize run-time by allocating both
fdsets in a single swoop.  Even together, they will still be, by far, smaller
than the fdarray.  The fdtable->open_fds is now used as the anchor for the
fdset memory allocation.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 208 ++++++++++++++++++++++----------------------------------------
 1 file changed, 72 insertions(+), 136 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 17e6a55521e..857fa49e984 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -32,46 +32,28 @@ struct fdtable_defer {
  */
 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 
-
-/*
- * Allocate an fd array, using kmalloc or vmalloc.
- * Note: the array isn't cleared at allocation time.
- */
-struct file ** alloc_fd_array(int num)
+static inline void * alloc_fdmem(unsigned int size)
 {
-	struct file **new_fds;
-	int size = num * sizeof(struct file *);
-
 	if (size <= PAGE_SIZE)
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
-	else 
-		new_fds = (struct file **) vmalloc(size);
-	return new_fds;
+		return kmalloc(size, GFP_KERNEL);
+	else
+		return vmalloc(size);
 }
 
-void free_fd_array(struct file **array, int num)
+static inline void free_fdarr(struct fdtable *fdt)
 {
-	int size = num * sizeof(struct file *);
-
-	if (!array) {
-		printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num);
-		return;
-	}
-
-	if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */
-		return;
-	else if (size <= PAGE_SIZE)
-		kfree(array);
+	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *)))
+		kfree(fdt->fd);
 	else
-		vfree(array);
+		vfree(fdt->fd);
 }
 
-static void __free_fdtable(struct fdtable *fdt)
+static inline void free_fdset(struct fdtable *fdt)
 {
-	free_fdset(fdt->open_fds, fdt->max_fds);
-	free_fdset(fdt->close_on_exec, fdt->max_fds);
-	free_fd_array(fdt->fd, fdt->max_fds);
-	kfree(fdt);
+	if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2))
+		kfree(fdt->open_fds);
+	else
+		vfree(fdt->open_fds);
 }
 
 static void free_fdtable_work(struct work_struct *work)
@@ -86,7 +68,9 @@ static void free_fdtable_work(struct work_struct *work)
 	spin_unlock_bh(&f->lock);
 	while(fdt) {
 		struct fdtable *next = fdt->next;
-		__free_fdtable(fdt);
+		vfree(fdt->fd);
+		free_fdset(fdt);
+		kfree(fdt);
 		fdt = next;
 	}
 }
@@ -94,12 +78,9 @@ static void free_fdtable_work(struct work_struct *work)
 void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
-	int fdset_size, fdarray_size;
 	struct fdtable_defer *fddef;
 
 	BUG_ON(!fdt);
-	fdset_size = fdt->max_fds / 8;
-	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
 	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
@@ -110,10 +91,9 @@ void free_fdtable_rcu(struct rcu_head *rcu)
 				container_of(fdt, struct files_struct, fdtab));
 		return;
 	}
-	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
-		kfree(fdt->open_fds);
-		kfree(fdt->close_on_exec);
+	if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) {
 		kfree(fdt->fd);
+		kfree(fdt->open_fds);
 		kfree(fdt);
 	} else {
 		fddef = &get_cpu_var(fdtable_defer_list);
@@ -131,116 +111,70 @@ void free_fdtable_rcu(struct rcu_head *rcu)
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
  */
-static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 {
-	int i;
-	int count;
-
-	BUG_ON(nfdt->max_fds < fdt->max_fds);
-	/* Copy the existing tables and install the new pointers */
-
-	i = fdt->max_fds / (sizeof(unsigned long) * 8);
-	count = (nfdt->max_fds - fdt->max_fds) / 8;
-
-	/*
-	 * Don't copy the entire array if the current fdset is
-	 * not yet initialised.
-	 */
-	if (i) {
-		memcpy (nfdt->open_fds, fdt->open_fds,
-						fdt->max_fds/8);
-		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
-						fdt->max_fds/8);
-		memset (&nfdt->open_fds->fds_bits[i], 0, count);
-		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
-	}
+	unsigned int cpy, set;
 
-	/* Don't copy/clear the array if we are creating a new
-	   fd array for fork() */
-	if (fdt->max_fds) {
-		memcpy(nfdt->fd, fdt->fd,
-			fdt->max_fds * sizeof(struct file *));
-		/* clear the remainder of the array */
-		memset(&nfdt->fd[fdt->max_fds], 0,
-		       (nfdt->max_fds - fdt->max_fds) *
-					sizeof(struct file *));
-	}
-}
-
-/*
- * Allocate an fdset array, using kmalloc or vmalloc.
- * Note: the array isn't cleared at allocation time.
- */
-fd_set * alloc_fdset(int num)
-{
-	fd_set *new_fdset;
-	int size = num / 8;
-
-	if (size <= PAGE_SIZE)
-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
-	else
-		new_fdset = (fd_set *) vmalloc(size);
-	return new_fdset;
-}
-
-void free_fdset(fd_set *array, int num)
-{
-	if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */
+	BUG_ON(nfdt->max_fds < ofdt->max_fds);
+	if (ofdt->max_fds == 0)
 		return;
-	else if (num <= 8 * PAGE_SIZE)
-		kfree(array);
-	else
-		vfree(array);
+
+	cpy = ofdt->max_fds * sizeof(struct file *);
+	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
+	memcpy(nfdt->fd, ofdt->fd, cpy);
+	memset((char *)(nfdt->fd) + cpy, 0, set);
+
+	cpy = ofdt->max_fds / BITS_PER_BYTE;
+	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
+	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
+	memset((char *)(nfdt->open_fds) + cpy, 0, set);
+	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
+	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
 }
 
-static struct fdtable *alloc_fdtable(int nr)
+static struct fdtable * alloc_fdtable(unsigned int nr)
 {
-	struct fdtable *fdt = NULL;
-	int nfds = 0;
-  	fd_set *new_openset = NULL, *new_execset = NULL;
-	struct file **new_fds;
-
-	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
-	if (!fdt)
-  		goto out;
+	struct fdtable *fdt;
+	char *data;
 
-	nfds = NR_OPEN_DEFAULT;
 	/*
-	 * Expand to the max in easy steps, and keep expanding it until
-	 * we have enough for the requested fd array size.
+	 * Figure out how many fds we actually want to support in this fdtable.
+	 * Allocation steps are keyed to the size of the fdarray, since it
+	 * grows far faster than any of the other dynamic data. We try to fit
+	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
+	 * and growing in powers of two from there on.
 	 */
-	do {
-#if NR_OPEN_DEFAULT < 256
-		if (nfds < 256)
-			nfds = 256;
-		else
-#endif
-		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
-			nfds = PAGE_SIZE / sizeof(struct file *);
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-  		}
-	} while (nfds <= nr);
-
-  	new_openset = alloc_fdset(nfds);
-  	new_execset = alloc_fdset(nfds);
-  	if (!new_openset || !new_execset)
-  		goto out;
-	fdt->open_fds = new_openset;
-	fdt->close_on_exec = new_execset;
+	nr /= (1024 / sizeof(struct file *));
+	nr = roundup_pow_of_two(nr + 1);
+	nr *= (1024 / sizeof(struct file *));
+	if (nr > NR_OPEN)
+		nr = NR_OPEN;
 
-	new_fds = alloc_fd_array(nfds);
-	if (!new_fds)
+	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
+	if (!fdt)
 		goto out;
-	fdt->fd = new_fds;
-	fdt->max_fds = nfds;
+	fdt->max_fds = nr;
+	data = alloc_fdmem(nr * sizeof(struct file *));
+	if (!data)
+		goto out_fdt;
+	fdt->fd = (struct file **)data;
+	data = alloc_fdmem(max_t(unsigned int,
+				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
+	if (!data)
+		goto out_arr;
+	fdt->open_fds = (fd_set *)data;
+	data += nr / BITS_PER_BYTE;
+	fdt->close_on_exec = (fd_set *)data;
+	INIT_RCU_HEAD(&fdt->rcu);
+	fdt->next = NULL;
+
 	return fdt;
-out:
-	free_fdset(new_openset, nfds);
-	free_fdset(new_execset, nfds);
+
+out_arr:
+	free_fdarr(fdt);
+out_fdt:
 	kfree(fdt);
+out:
 	return NULL;
 }
 
@@ -275,7 +209,9 @@ static int expand_fdtable(struct files_struct *files, int nr)
 			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
-		__free_fdtable(new_fdt);
+		free_fdarr(new_fdt);
+		free_fdset(new_fdt);
+		kfree(new_fdt);
 	}
 	return 1;
 }
-- 
cgit v1.2.3


From 44d306e1508fef6fa7a6eb15a1aba86ef68389a6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 10 Dec 2006 02:21:26 -0800
Subject: [PATCH] user of the jiffies rounding code: JBD

This patch introduces a user: of the round_jiffies() function; the "5 second"
ext3/jbd wakeup.

While "every 5 seconds" doesn't sound as a problem, there can be many of these
(and these timers do add up over all the kernel).  The "5 second" wakeup isn't
really timing sensitive; in addition even with rounding it'll still happen
every 5 seconds (with the exception of the very first time, which is likely to
be rounded up to somewhere closer to 6 seconds)

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d38e0d575e4..cceaf57e377 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -55,7 +55,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 	spin_lock_init(&transaction->t_handle_lock);
 
 	/* Set up the commit timer for the new transaction. */
-	journal->j_commit_timer.expires = transaction->t_expires;
+	journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
 	add_timer(&journal->j_commit_timer);
 
 	J_ASSERT(journal->j_running_transaction == NULL);
-- 
cgit v1.2.3


From 8993780a6e44fb4e7ed34e33458506a775356c6e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Mon, 11 Dec 2006 09:28:46 -0800
Subject: Make SLES9 "get_kernel_version" work on the kernel binary again

As reported by Andy Whitcroft, at least the SLES9 initrd build process
depends on getting the kernel version from the kernel binary.  It does
that by simply trawling the binary and looking for the signature of the
"linux_banner" string (the string "Linux version " to be exact. Which
is really broken in itself, but whatever..)

That got broken when the string was changed to allow /proc/version to
change the UTS release information dynamically, and "get_kernel_version"
thus returned "%s" (see commit a2ee8649ba6d71416712e798276bf7c40b64e6e5:
"[PATCH] Fix linux banner utsname information").

This just restores "linux_banner" as a static string, which should fix
the version finding.  And /proc/version simply uses a different string.

To avoid wasting even that miniscule amount of memory, the early boot
string should really be marked __initdata, but that just causes the same
bug in SLES9 to re-appear, since it will then find other occurrences of
"Linux version " first.

Cc: Andy Whitcroft <apw@shadowen.org>
Acked-by: Herbert Poetzl <herbert@13thfloor.at>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Steve Fox <drfickle@us.ibm.com>
Acked-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index dc3e580d1dc..92ea7743fe8 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -47,6 +47,7 @@
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
 #include <linux/pid_namespace.h>
+#include <linux/compile.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -253,8 +254,15 @@ static int version_read_proc(char *page, char **start, off_t off,
 {
 	int len;
 
-	len = sprintf(page, linux_banner,
-		utsname()->release, utsname()->version);
+	/* FIXED STRING! Don't touch! */
+	len = snprintf(page, PAGE_SIZE,
+		"%s version %s"
+		" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
+		" (" LINUX_COMPILER ")"
+		" %s\n",
+		utsname()->sysname,
+		utsname()->release,
+		utsname()->version);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.3


From 828ae6afbef03bfe107a4a8cc38798419d6a2765 Mon Sep 17 00:00:00 2001
From: Andrew Beekhof <abeekhof@suse.de>
Date: Mon, 4 Dec 2006 14:04:55 +0100
Subject: [patch 3/3] OCFS2 Configurable timeouts - Protocol changes

Modify the OCFS2 handshake to ensure essential timeouts are configured
identically on all nodes.

Only allow changes when there are no connected peers

Improves the logic in o2net_advance_rx() which broke now that
sizeof(struct o2net_handshake) is greater than sizeof(struct o2net_msg)

Included is the field for userspace-heartbeat timeout to avoid the need for
further protocol changes.

Uses a global spinlock to ensure the decisions to update configfs entries
are made on the correct value.  The region covered by the spinlock when
incrementing the counter is much larger as this is the more critical case.

Small cleanup contributed by Adrian Bunk <bunk@stusta.de>

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/nodemanager.c  | 30 +++++++++++---
 fs/ocfs2/cluster/tcp.c          | 92 +++++++++++++++++++++++++++++++++++++----
 fs/ocfs2/cluster/tcp.h          |  1 +
 fs/ocfs2/cluster/tcp_internal.h |  9 +++-
 4 files changed, 116 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 234f83f2897..357f1d55177 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -573,12 +573,21 @@ static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(
 	ret =  o2nm_cluster_attr_write(page, count, &val);
 
 	if (ret > 0) {
-		if (val <= cluster->cl_keepalive_delay_ms) {
+		if (cluster->cl_idle_timeout_ms != val
+			&& o2net_num_connected_peers()) {
+			mlog(ML_NOTICE,
+			     "o2net: cannot change idle timeout after "
+			     "the first peer has agreed to it."
+			     "  %d connected peers\n",
+			     o2net_num_connected_peers());
+			ret = -EINVAL;
+		} else if (val <= cluster->cl_keepalive_delay_ms) {
 			mlog(ML_NOTICE, "o2net: idle timeout must be larger "
 			     "than keepalive delay\n");
-			return -EINVAL;
+			ret = -EINVAL;
+		} else {
+			cluster->cl_idle_timeout_ms = val;
 		}
-		cluster->cl_idle_timeout_ms = val;
 	}
 
 	return ret;
@@ -599,12 +608,21 @@ static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(
 	ret =  o2nm_cluster_attr_write(page, count, &val);
 
 	if (ret > 0) {
-		if (val >= cluster->cl_idle_timeout_ms) {
+		if (cluster->cl_keepalive_delay_ms != val
+		    && o2net_num_connected_peers()) {
+			mlog(ML_NOTICE,
+			     "o2net: cannot change keepalive delay after"
+			     " the first peer has agreed to it."
+			     "  %d connected peers\n",
+			     o2net_num_connected_peers());
+			ret = -EINVAL;
+		} else if (val >= cluster->cl_idle_timeout_ms) {
 			mlog(ML_NOTICE, "o2net: keepalive delay must be "
 			     "smaller than idle timeout\n");
-			return -EINVAL;
+			ret = -EINVAL;
+		} else {
+			cluster->cl_keepalive_delay_ms = val;
 		}
-		cluster->cl_keepalive_delay_ms = val;
 	}
 
 	return ret;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ebbaee664c6..457753df1ae 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -380,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
 		sc_put(sc);
 }
 
+static atomic_t o2net_connected_peers = ATOMIC_INIT(0);
+
+int o2net_num_connected_peers(void)
+{
+	return atomic_read(&o2net_connected_peers);
+}
+
 static void o2net_set_nn_state(struct o2net_node *nn,
 			       struct o2net_sock_container *sc,
 			       unsigned valid, int err)
@@ -390,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 
 	assert_spin_locked(&nn->nn_lock);
 
+	if (old_sc && !sc)
+		atomic_dec(&o2net_connected_peers);
+	else if (!old_sc && sc)
+		atomic_inc(&o2net_connected_peers);
+
 	/* the node num comparison and single connect/accept path should stop
 	 * an non-null sc from being overwritten with another */
 	BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc);
@@ -1123,6 +1135,44 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
 		return -1;
 	}
 
+	/*
+	 * Ensure timeouts are consistent with other nodes, otherwise
+	 * we can end up with one node thinking that the other must be down,
+	 * but isn't. This can ultimately cause corruption.
+	 */
+	if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
+				o2net_idle_timeout(sc->sc_node)) {
+		mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
+		     "%u ms, but we use %u ms locally.  disconnecting\n",
+		     SC_NODEF_ARGS(sc),
+		     be32_to_cpu(hand->o2net_idle_timeout_ms),
+		     o2net_idle_timeout(sc->sc_node));
+		o2net_ensure_shutdown(nn, sc, -ENOTCONN);
+		return -1;
+	}
+
+	if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
+			o2net_keepalive_delay(sc->sc_node)) {
+		mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
+		     "%u ms, but we use %u ms locally.  disconnecting\n",
+		     SC_NODEF_ARGS(sc),
+		     be32_to_cpu(hand->o2net_keepalive_delay_ms),
+		     o2net_keepalive_delay(sc->sc_node));
+		o2net_ensure_shutdown(nn, sc, -ENOTCONN);
+		return -1;
+	}
+
+	if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) !=
+			O2HB_MAX_WRITE_TIMEOUT_MS) {
+		mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of "
+		     "%u ms, but we use %u ms locally.  disconnecting\n",
+		     SC_NODEF_ARGS(sc),
+		     be32_to_cpu(hand->o2hb_heartbeat_timeout_ms),
+		     O2HB_MAX_WRITE_TIMEOUT_MS);
+		o2net_ensure_shutdown(nn, sc, -ENOTCONN);
+		return -1;
+	}
+
 	sc->sc_handshake_ok = 1;
 
 	spin_lock(&nn->nn_lock);
@@ -1155,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
 	sclog(sc, "receiving\n");
 	do_gettimeofday(&sc->sc_tv_advance_start);
 
+	if (unlikely(sc->sc_handshake_ok == 0)) {
+		if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
+			data = page_address(sc->sc_page) + sc->sc_page_off;
+			datalen = sizeof(struct o2net_handshake) - sc->sc_page_off;
+			ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
+			if (ret > 0)
+				sc->sc_page_off += ret;
+		}
+
+		if (sc->sc_page_off == sizeof(struct o2net_handshake)) {
+			o2net_check_handshake(sc);
+			if (unlikely(sc->sc_handshake_ok == 0))
+				ret = -EPROTO;
+		}
+		goto out;
+	}
+
 	/* do we need more header? */
 	if (sc->sc_page_off < sizeof(struct o2net_msg)) {
 		data = page_address(sc->sc_page) + sc->sc_page_off;
@@ -1162,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
 		ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
 		if (ret > 0) {
 			sc->sc_page_off += ret;
-
-			/* this working relies on the handshake being
-			 * smaller than the normal message header */
-			if (sc->sc_page_off >= sizeof(struct o2net_handshake)&&
-			    !sc->sc_handshake_ok && o2net_check_handshake(sc)) {
-				ret = -EPROTO;
-				goto out;
-			}
-
 			/* only swab incoming here.. we can
 			 * only get here once as we cross from
 			 * being under to over */
@@ -1272,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock)
 	return ret;
 }
 
+static void o2net_initialize_handshake(void)
+{
+	o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
+		O2HB_MAX_WRITE_TIMEOUT_MS);
+	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
+		o2net_idle_timeout(NULL));
+	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
+		o2net_keepalive_delay(NULL));
+	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
+		o2net_reconnect_delay(NULL));
+}
+
 /* ------------------------------------------------------------ */
 
 /* called when a connect completes and after a sock is accepted.  the
@@ -1286,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work)
               (unsigned long long)O2NET_PROTOCOL_VERSION,
 	      (unsigned long long)be64_to_cpu(o2net_hand->connector_id));
 
+	o2net_initialize_handshake();
 	o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
 	sc_put(sc);
 }
@@ -1514,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
 
 	if (node_num != o2nm_this_node())
 		o2net_disconnect_node(node);
+
+	BUG_ON(atomic_read(&o2net_connected_peers) < 0);
 }
 
 static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
@@ -1677,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock)
 	o2net_register_callbacks(sc->sc_sock->sk, sc);
 	o2net_sc_queue_work(sc, &sc->sc_rx_work);
 
+	o2net_initialize_handshake();
 	o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
 
 out:
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 2e08976050f..21a4e43df83 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -108,6 +108,7 @@ void o2net_unregister_hb_callbacks(void);
 int o2net_start_listening(struct o2nm_node *node);
 void o2net_stop_listening(struct o2nm_node *node);
 void o2net_disconnect_node(struct o2nm_node *node);
+int o2net_num_connected_peers(void);
 
 int o2net_init(void);
 void o2net_exit(void);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 56f7ee1d254..b700dc9624d 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
  * locking semantics of the file system using the protocol.  It should 
  * be somewhere else, I'm sure, but right now it isn't.
  *
+ * New in version 5:
+ * 	- Network timeout checking protocol
+ *
  * New in version 4:
  * 	- Remove i_generation from lock names for better stat performance.
  *
@@ -48,10 +51,14 @@
  * 	- full 64 bit i_size in the metadata lock lvbs
  * 	- introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 4ULL
+#define O2NET_PROTOCOL_VERSION 5ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;
+	__be32  o2hb_heartbeat_timeout_ms;
+	__be32  o2net_idle_timeout_ms;
+	__be32  o2net_keepalive_delay_ms;
+	__be32  o2net_reconnect_delay_ms;
 };
 
 struct o2net_node {
-- 
cgit v1.2.3


From d23edbd3d596553db4ffac71a6e3745e85aee7cc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@gmx.de>
Date: Tue, 12 Dec 2006 19:07:45 +0100
Subject: EXT{2,3,4}_FS: remove outdated part of the help text

Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Acked-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/Kconfig | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index b3b5aa0edff..0f8dfd5daca 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -12,9 +12,7 @@ config EXT2_FS
 	  Ext2 is a standard Linux file system for hard disks.
 
 	  To compile this file system support as a module, choose M here: the
-	  module will be called ext2.  Be aware however that the file system
-	  of your root partition (the one containing the directory /) cannot
-	  be compiled as a module, and so this could be dangerous.
+	  module will be called ext2.
 
 	  If unsure, say Y.
 
@@ -98,9 +96,7 @@ config EXT3_FS
 	  (available at <http://sourceforge.net/projects/e2fsprogs/>).
 
 	  To compile this file system support as a module, choose M here: the
-	  module will be called ext3.  Be aware however that the file system
-	  of your root partition (the one containing the directory /) cannot
-	  be compiled as a module, and so this may be dangerous.
+	  module will be called ext3.
 
 config EXT3_FS_XATTR
 	bool "Ext3 extended attributes"
@@ -163,9 +159,7 @@ config EXT4DEV_FS
 	  features will be added to ext4dev gradually.
 
 	  To compile this file system support as a module, choose M here. The
-	  module will be called ext4dev.  Be aware, however, that the filesystem
-	  of your root partition (the one containing the directory /) cannot
-	  be compiled as a module, and so this could be dangerous.
+	  module will be called ext4dev.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3


From bef1f40261c8bc5ad2ca70a5a1760b0eb79b6812 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Tue, 12 Dec 2006 20:04:19 +0100
Subject: kconfig: Standardize "depends" -> "depends on" in Kconfig files

Standardize the miniscule percentage of occurrences of "depends" in
Kconfig files to "depends on", and update kconfig-language.txt to
reflect that.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 0f8dfd5daca..276ff3baaaf 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1002,7 +1002,7 @@ config TMPFS_POSIX_ACL
 
 config HUGETLBFS
 	bool "HugeTLB file system support"
-	depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
+	depends on X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
 	help
 	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
 	  ramfs. For architectures that support it, say Y here and read
-- 
cgit v1.2.3


From b87576d59b5e8bd235e8579fd33b5d5af528b210 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Tue, 12 Dec 2006 20:07:35 +0100
Subject: fs: Convert kmalloc() + memset() to kzalloc() in fs/.

Convert the single available instance of kmalloc() + memset() to
kzalloc() in the fs/ directory.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 fs/binfmt_elf_fdpic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 76f06f6bc2f..6e6d4568d54 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -706,12 +706,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 		return -ELIBBAD;
 
 	size = sizeof(*loadmap) + nloads * sizeof(*seg);
-	loadmap = kmalloc(size, GFP_KERNEL);
+	loadmap = kzalloc(size, GFP_KERNEL);
 	if (!loadmap)
 		return -ENOMEM;
 
 	params->loadmap = loadmap;
-	memset(loadmap, 0, size);
 
 	loadmap->version = ELF32_FDPIC_LOADMAP_VERSION;
 	loadmap->nsegs = nloads;
-- 
cgit v1.2.3


From d4c3cca941b64a938eaa9734585a93547c6be323 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 13 Dec 2006 00:34:04 -0800
Subject: [PATCH] constify pipe_buf_operations

- pipe/splice should use const pipe_buf_operations and file_operations

- struct pipe_inode_info has an unused field "start" : get rid of it.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c   | 12 ++++++------
 fs/splice.c |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index f8b6bdcb879..9a06e8e48e8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -207,7 +207,7 @@ int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
 	return 0;
 }
 
-static struct pipe_buf_operations anon_pipe_buf_ops = {
+static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
@@ -243,7 +243,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 		if (bufs) {
 			int curbuf = pipe->curbuf;
 			struct pipe_buffer *buf = pipe->bufs + curbuf;
-			struct pipe_buf_operations *ops = buf->ops;
+			const struct pipe_buf_operations *ops = buf->ops;
 			void *addr;
 			size_t chars = buf->len;
 			int error, atomic;
@@ -365,7 +365,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
 							(PIPE_BUFFERS-1);
 		struct pipe_buffer *buf = pipe->bufs + lastbuf;
-		struct pipe_buf_operations *ops = buf->ops;
+		const struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
 
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
@@ -756,7 +756,7 @@ const struct file_operations rdwr_fifo_fops = {
 	.fasync		= pipe_rdwr_fasync,
 };
 
-static struct file_operations read_pipe_fops = {
+static const struct file_operations read_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
@@ -768,7 +768,7 @@ static struct file_operations read_pipe_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-static struct file_operations write_pipe_fops = {
+static const struct file_operations write_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= do_sync_write,
@@ -780,7 +780,7 @@ static struct file_operations write_pipe_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-static struct file_operations rdwr_pipe_fops = {
+static const struct file_operations rdwr_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
diff --git a/fs/splice.c b/fs/splice.c
index bbd0aeb3f68..2fca6ebf4cc 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -42,7 +42,7 @@ struct splice_pipe_desc {
 	struct partial_page *partial;	/* pages[] may not be contig */
 	int nr_pages;			/* number of pages in map */
 	unsigned int flags;		/* splice flags */
-	struct pipe_buf_operations *ops;/* ops associated with output pipe */
+	const struct pipe_buf_operations *ops;/* ops associated with output pipe */
 };
 
 /*
@@ -139,7 +139,7 @@ error:
 	return err;
 }
 
-static struct pipe_buf_operations page_cache_pipe_buf_ops = {
+static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
@@ -159,7 +159,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 	return generic_pipe_buf_steal(pipe, buf);
 }
 
-static struct pipe_buf_operations user_page_pipe_buf_ops = {
+static const struct pipe_buf_operations user_page_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
@@ -724,7 +724,7 @@ static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 	for (;;) {
 		if (pipe->nrbufs) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
-			struct pipe_buf_operations *ops = buf->ops;
+			const struct pipe_buf_operations *ops = buf->ops;
 
 			sd.len = buf->len;
 			if (sd.len > sd.total_len)
-- 
cgit v1.2.3


From b227613841d4d211a10c5860acc73e133b613bc0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 13 Dec 2006 00:34:33 -0800
Subject: [PATCH] touch_atime() cleanup

Simplify touch_atime() layout.

Cc: Valerie Henson <val_henson@linux.intel.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index d00de182ecb..04536ebc5ac 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1144,7 +1144,6 @@ sector_t bmap(struct inode * inode, sector_t block)
 		res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
 	return res;
 }
-
 EXPORT_SYMBOL(bmap);
 
 /**
@@ -1163,19 +1162,22 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 
 	if (IS_RDONLY(inode))
 		return;
-
-	if ((inode->i_flags & S_NOATIME) ||
-	    (inode->i_sb->s_flags & MS_NOATIME) ||
-	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
+	if (inode->i_flags & S_NOATIME)
+		return;
+	if (inode->i_sb->s_flags & MS_NOATIME)
+		return;
+	if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
 		return;
 
 	/*
 	 * We may have a NULL vfsmount when coming from NFSD
 	 */
-	if (mnt &&
-	    ((mnt->mnt_flags & MNT_NOATIME) ||
-	     ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))))
-		return;
+	if (mnt) {
+		if (mnt->mnt_flags & MNT_NOATIME)
+			return;
+		if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
+			return;
+	}
 
 	now = current_fs_time(inode->i_sb);
 	if (!timespec_equal(&inode->i_atime, &now)) {
@@ -1183,7 +1185,6 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 		mark_inode_dirty_sync(inode);
 	}
 }
-
 EXPORT_SYMBOL(touch_atime);
 
 /**
-- 
cgit v1.2.3


From 47ae32d6a54955a041cdc30b06d0bb16e75f68d5 Mon Sep 17 00:00:00 2001
From: Valerie Henson <val_henson@linux.intel.com>
Date: Wed, 13 Dec 2006 00:34:34 -0800
Subject: [PATCH] relative atime

Add "relatime" (relative atime) support.  Relative atime only updates the
atime if the previous atime is older than the mtime or ctime.  Like
noatime, but useful for applications like mutt that need to know when a
file has been read since it was last modified.

A corresponding patch against mount(8) is available at
http://userweb.kernel.org/~akpm/mount-relative-atime.txt

Signed-off-by: Valerie Henson <val_henson@linux.intel.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Karel Zak <kzak@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c     | 22 ++++++++++++++++++----
 fs/namespace.c |  5 ++++-
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 04536ebc5ac..bf21dc6d0db 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1177,13 +1177,27 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 			return;
 		if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
 			return;
+
+		if (mnt->mnt_flags & MNT_RELATIME) {
+			/*
+			 * With relative atime, only update atime if the
+			 * previous atime is earlier than either the ctime or
+			 * mtime.
+			 */
+			if (timespec_compare(&inode->i_mtime,
+						&inode->i_atime) < 0 &&
+			    timespec_compare(&inode->i_ctime,
+						&inode->i_atime) < 0)
+				return;
+		}
 	}
 
 	now = current_fs_time(inode->i_sb);
-	if (!timespec_equal(&inode->i_atime, &now)) {
-		inode->i_atime = now;
-		mark_inode_dirty_sync(inode);
-	}
+	if (timespec_equal(&inode->i_atime, &now))
+		return;
+
+	inode->i_atime = now;
+	mark_inode_dirty_sync(inode);
 }
 EXPORT_SYMBOL(touch_atime);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index fde8553faa7..5ef336c1103 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -368,6 +368,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
 		{ MNT_NOEXEC, ",noexec" },
 		{ MNT_NOATIME, ",noatime" },
 		{ MNT_NODIRATIME, ",nodiratime" },
+		{ MNT_RELATIME, ",relatime" },
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
@@ -1405,9 +1406,11 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 		mnt_flags |= MNT_NOATIME;
 	if (flags & MS_NODIRATIME)
 		mnt_flags |= MNT_NODIRATIME;
+	if (flags & MS_RELATIME)
+		mnt_flags |= MNT_RELATIME;
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-		   MS_NOATIME | MS_NODIRATIME);
+		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
 
 	/* ... and get the mountpoint */
 	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
-- 
cgit v1.2.3


From 7e913c53609d5e8374f55d6f29c0bcd6650a2362 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Dec 2006 00:34:35 -0800
Subject: [PATCH] ocfs2: relative atime support

Update ocfs2_should_update_atime() to understand the MNT_RELATIME flag and
to test against mtime / ctime accordingly.

[akpm@osdl.org: cleanups]
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Valerie Henson <val_henson@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ocfs2/file.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e9a82ad95c1..9fd590b9bde 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -153,6 +153,14 @@ int ocfs2_should_update_atime(struct inode *inode,
 	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		return 0;
 
+	if (vfsmnt->mnt_flags & MNT_RELATIME) {
+		if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
+		    (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0))
+			return 1;
+
+		return 0;
+	}
+
 	now = CURRENT_TIME;
 	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
 		return 0;
-- 
cgit v1.2.3


From e61c90188b9956edae1105eef361d8981a352fcd Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 13 Dec 2006 00:34:36 -0800
Subject: [PATCH] optimize o_direct on block devices

Implement block device specific .direct_IO method instead of going through
generic direct_io_worker for block device.

direct_io_worker() is fairly complex because it needs to handle O_DIRECT on
file system, where it needs to perform block allocation, hole detection,
extents file on write, and tons of other corner cases.  The end result is
that it takes tons of CPU time to submit an I/O.

For block device, the block allocation is much simpler and a tight triple
loop can be written to iterate each iovec and each page within the iovec in
order to construct/prepare bio structure and then subsequently submit it to
the block layer.  This significantly speeds up O_D on block device.

[akpm@osdl.org: small speedup]
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c       |   2 +-
 fs/block_dev.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 176 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 7ec737eda72..7618bcb1836 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -916,7 +916,7 @@ void bio_set_pages_dirty(struct bio *bio)
 	}
 }
 
-static void bio_release_pages(struct bio *bio)
+void bio_release_pages(struct bio *bio)
 {
 	struct bio_vec *bvec = bio->bi_io_vec;
 	int i;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 197f9392184..1715d6b5f41 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -129,43 +129,191 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
-static int
-blkdev_get_blocks(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh, int create)
+static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 {
-	sector_t end_block = max_block(I_BDEV(inode));
-	unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
+	struct kiocb *iocb = bio->bi_private;
+	atomic_t *bio_count = &iocb->ki_bio_count;
 
-	if ((iblock + max_blocks) > end_block) {
-		max_blocks = end_block - iblock;
-		if ((long)max_blocks <= 0) {
-			if (create)
-				return -EIO;	/* write fully beyond EOF */
-			/*
-			 * It is a read which is fully beyond EOF.  We return
-			 * a !buffer_mapped buffer
-			 */
-			max_blocks = 0;
-		}
+	if (bio_data_dir(bio) == READ)
+		bio_check_pages_dirty(bio);
+	else {
+		bio_release_pages(bio);
+		bio_put(bio);
+	}
+
+	/* iocb->ki_nbytes stores error code from LLDD */
+	if (error)
+		iocb->ki_nbytes = -EIO;
+
+	if (atomic_dec_and_test(bio_count)) {
+		if (iocb->ki_nbytes < 0)
+			aio_complete(iocb, iocb->ki_nbytes, 0);
+		else
+			aio_complete(iocb, iocb->ki_left, 0);
 	}
 
-	bh->b_bdev = I_BDEV(inode);
-	bh->b_blocknr = iblock;
-	bh->b_size = max_blocks << inode->i_blkbits;
-	if (max_blocks)
-		set_buffer_mapped(bh);
 	return 0;
 }
 
+#define VEC_SIZE	16
+struct pvec {
+	unsigned short nr;
+	unsigned short idx;
+	struct page *page[VEC_SIZE];
+};
+
+#define PAGES_SPANNED(addr, len)	\
+	(DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE);
+
+/*
+ * get page pointer for user addr, we internally cache struct page array for
+ * (addr, count) range in pvec to avoid frequent call to get_user_pages.  If
+ * internal page list is exhausted, a batch count of up to VEC_SIZE is used
+ * to get next set of page struct.
+ */
+static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
+				 struct pvec *pvec)
+{
+	int ret, nr_pages;
+	if (pvec->idx == pvec->nr) {
+		nr_pages = PAGES_SPANNED(addr, count);
+		nr_pages = min(nr_pages, VEC_SIZE);
+		down_read(&current->mm->mmap_sem);
+		ret = get_user_pages(current, current->mm, addr, nr_pages,
+				     rw == READ, 0, pvec->page, NULL);
+		up_read(&current->mm->mmap_sem);
+		if (ret < 0)
+			return ERR_PTR(ret);
+		pvec->nr = ret;
+		pvec->idx = 0;
+	}
+	return pvec->page[pvec->idx++];
+}
+
 static ssize_t
 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
+		 loff_t pos, unsigned long nr_segs)
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode)));
+	unsigned blocksize_mask = (1 << blkbits) - 1;
+	unsigned long seg = 0;	/* iov segment iterator */
+	unsigned long nvec;	/* number of bio vec needed */
+	unsigned long cur_off;	/* offset into current page */
+	unsigned long cur_len;	/* I/O len of current page, up to PAGE_SIZE */
+
+	unsigned long addr;	/* user iovec address */
+	size_t count;		/* user iovec len */
+	size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */
+	loff_t size;		/* size of block device */
+	struct bio *bio;
+	atomic_t *bio_count = &iocb->ki_bio_count;
+	struct page *page;
+	struct pvec pvec;
+
+	pvec.nr = 0;
+	pvec.idx = 0;
+
+	if (pos & blocksize_mask)
+		return -EINVAL;
+
+	size = i_size_read(inode);
+	if (pos + nbytes > size) {
+		nbytes = size - pos;
+		iocb->ki_left = nbytes;
+	}
+
+	/*
+	 * check first non-zero iov alignment, the remaining
+	 * iov alignment is checked inside bio loop below.
+	 */
+	do {
+		addr = (unsigned long) iov[seg].iov_base;
+		count = min(iov[seg].iov_len, nbytes);
+		if (addr & blocksize_mask || count & blocksize_mask)
+			return -EINVAL;
+	} while (!count && ++seg < nr_segs);
+	atomic_set(bio_count, 1);
+
+	while (nbytes) {
+		/* roughly estimate number of bio vec needed */
+		nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE;
+		nvec = max(nvec, nr_segs - seg);
+		nvec = min(nvec, (unsigned long) BIO_MAX_PAGES);
+
+		/* bio_alloc should not fail with GFP_KERNEL flag */
+		bio = bio_alloc(GFP_KERNEL, nvec);
+		bio->bi_bdev = I_BDEV(inode);
+		bio->bi_end_io = blk_end_aio;
+		bio->bi_private = iocb;
+		bio->bi_sector = pos >> blkbits;
+same_bio:
+		cur_off = addr & ~PAGE_MASK;
+		cur_len = PAGE_SIZE - cur_off;
+		if (count < cur_len)
+			cur_len = count;
+
+		page = blk_get_page(addr, count, rw, &pvec);
+		if (unlikely(IS_ERR(page)))
+			goto backout;
+
+		if (bio_add_page(bio, page, cur_len, cur_off)) {
+			pos += cur_len;
+			addr += cur_len;
+			count -= cur_len;
+			nbytes -= cur_len;
+
+			if (count)
+				goto same_bio;
+			while (++seg < nr_segs) {
+				addr = (unsigned long) iov[seg].iov_base;
+				count = iov[seg].iov_len;
+				if (!count)
+					continue;
+				if (unlikely(addr & blocksize_mask ||
+					     count & blocksize_mask)) {
+					page = ERR_PTR(-EINVAL);
+					goto backout;
+				}
+				count = min(count, nbytes);
+				goto same_bio;
+			}
+		}
 
-	return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
-				iov, offset, nr_segs, blkdev_get_blocks, NULL);
+		/* bio is ready, submit it */
+		if (rw == READ)
+			bio_set_pages_dirty(bio);
+		atomic_inc(bio_count);
+		submit_bio(rw, bio);
+	}
+
+completion:
+	iocb->ki_left -= nbytes;
+	nbytes = iocb->ki_left;
+	iocb->ki_pos += nbytes;
+
+	blk_run_address_space(inode->i_mapping);
+	if (atomic_dec_and_test(bio_count))
+		aio_complete(iocb, nbytes, 0);
+
+	return -EIOCBQUEUED;
+
+backout:
+	/*
+	 * back out nbytes count constructed so far for this bio,
+	 * we will throw away current bio.
+	 */
+	nbytes += bio->bi_size;
+	bio_release_pages(bio);
+	bio_put(bio);
+
+	/*
+	 * if no bio was submmitted, return the error code.
+	 * otherwise, proceed with pending I/O completion.
+	 */
+	if (atomic_read(bio_count) == 1)
+		return PTR_ERR(page);
+	goto completion;
 }
 
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
-- 
cgit v1.2.3


From 90aef12e6dd609e1ad7fb70044eedc78ca55ee5e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 13 Dec 2006 00:34:49 -0800
Subject: [PATCH] Use activate_mm() in fs/aio.c:use_mm()

activate_mm() is not the right thing to be using in use_mm().  It should be
switch_mm().

On normal x86, they're synonymous, but for the Xen patches I'm adding a
hook which assumes that activate_mm is only used the first time a new mm
is used after creation (I have another hook for dealing with dup_mm).  I
think this use of activate_mm() is the only place where it could be used
a second time on an mm.

>From a quick look at the other architectures I think this is OK (most
simply implement one in terms of the other), but some are doing some
subtly different stuff between the two.

Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index d3a6ec2c962..5f577a63bdf 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,7 @@ static void use_mm(struct mm_struct *mm)
 	 * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
 	 * it won't work. Update it accordingly if you change it here
 	 */
-	activate_mm(active_mm, mm);
+	switch_mm(active_mm, mm, tsk);
 	task_unlock(tsk);
 
 	mmdrop(active_mm);
-- 
cgit v1.2.3


From cd86128088554d64fea1679191509f00e6353c5b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Wed, 13 Dec 2006 00:34:52 -0800
Subject: [PATCH] Fix numerous kcalloc() calls, convert to kzalloc()

All kcalloc() calls of the form "kcalloc(1,...)" are converted to the
equivalent kzalloc() calls, and a few kcalloc() calls with the incorrect
ordering of the first two arguments are fixed.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Greg KH <greg@kroah.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ocfs2/alloc.c               |  2 +-
 fs/ocfs2/cluster/heartbeat.c   |  4 ++--
 fs/ocfs2/cluster/nodemanager.c |  6 +++---
 fs/ocfs2/cluster/tcp.c         | 10 +++++-----
 fs/ocfs2/dlm/dlmdomain.c       |  4 ++--
 fs/ocfs2/dlm/dlmlock.c         |  4 ++--
 fs/ocfs2/dlm/dlmmaster.c       |  2 +-
 fs/ocfs2/dlm/dlmrecovery.c     |  6 +++---
 fs/ocfs2/localalloc.c          |  2 +-
 fs/ocfs2/slot_map.c            |  2 +-
 fs/ocfs2/suballoc.c            |  6 +++---
 fs/ocfs2/super.c               |  6 +++---
 fs/ocfs2/vote.c                |  4 ++--
 13 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edc91ca3792..f27e5378caf 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1959,7 +1959,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	*tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
+	*tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
 	if (!(*tc)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4cd9a958045..a25ef5a5038 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1553,7 +1553,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 	struct o2hb_region *reg = NULL;
 	struct config_item *ret = NULL;
 
-	reg = kcalloc(1, sizeof(struct o2hb_region), GFP_KERNEL);
+	reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
 	if (reg == NULL)
 		goto out; /* ENOMEM */
 
@@ -1679,7 +1679,7 @@ struct config_group *o2hb_alloc_hb_set(void)
 	struct o2hb_heartbeat_group *hs = NULL;
 	struct config_group *ret = NULL;
 
-	hs = kcalloc(1, sizeof(struct o2hb_heartbeat_group), GFP_KERNEL);
+	hs = kzalloc(sizeof(struct o2hb_heartbeat_group), GFP_KERNEL);
 	if (hs == NULL)
 		goto out;
 
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 357f1d55177..b17333a0606 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -714,7 +714,7 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
 	if (strlen(name) > O2NM_MAX_NAME_LEN)
 		goto out; /* ENAMETOOLONG */
 
-	node = kcalloc(1, sizeof(struct o2nm_node), GFP_KERNEL);
+	node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL);
 	if (node == NULL)
 		goto out; /* ENOMEM */
 
@@ -825,8 +825,8 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	if (o2nm_single_cluster)
 		goto out; /* ENOSPC */
 
-	cluster = kcalloc(1, sizeof(struct o2nm_cluster), GFP_KERNEL);
-	ns = kcalloc(1, sizeof(struct o2nm_node_group), GFP_KERNEL);
+	cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
+	ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
 	defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
 	o2hb_group = o2hb_alloc_hb_set();
 	if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 457753df1ae..ae4ff4a6636 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -324,7 +324,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 	struct page *page = NULL;
 
 	page = alloc_page(GFP_NOFS);
-	sc = kcalloc(1, sizeof(*sc), GFP_NOFS);
+	sc = kzalloc(sizeof(*sc), GFP_NOFS);
 	if (sc == NULL || page == NULL)
 		goto out;
 
@@ -714,7 +714,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
 		goto out;
 	}
 
-       	nmh = kcalloc(1, sizeof(struct o2net_msg_handler), GFP_NOFS);
+       	nmh = kzalloc(sizeof(struct o2net_msg_handler), GFP_NOFS);
 	if (nmh == NULL) {
 		ret = -ENOMEM;
 		goto out;
@@ -1918,9 +1918,9 @@ int o2net_init(void)
 
 	o2quo_init();
 
-	o2net_hand = kcalloc(1, sizeof(struct o2net_handshake), GFP_KERNEL);
-	o2net_keep_req = kcalloc(1, sizeof(struct o2net_msg), GFP_KERNEL);
-	o2net_keep_resp = kcalloc(1, sizeof(struct o2net_msg), GFP_KERNEL);
+	o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
+	o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
+	o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
 	if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) {
 		kfree(o2net_hand);
 		kfree(o2net_keep_req);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 420a375a394..f0b25f2dd20 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -920,7 +920,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
 
 	mlog_entry("%p", dlm);
 
-	ctxt = kcalloc(1, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -1223,7 +1223,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 	int i;
 	struct dlm_ctxt *dlm = NULL;
 
-	dlm = kcalloc(1, sizeof(*dlm), GFP_KERNEL);
+	dlm = kzalloc(sizeof(*dlm), GFP_KERNEL);
 	if (!dlm) {
 		mlog_errno(-ENOMEM);
 		goto leave;
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 42a1b91979b..e5ca3db197f 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -408,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 	struct dlm_lock *lock;
 	int kernel_allocated = 0;
 
-	lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
+	lock = kzalloc(sizeof(*lock), GFP_NOFS);
 	if (!lock)
 		return NULL;
 
 	if (!lksb) {
 		/* zero memory only if kernel-allocated */
-		lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
+		lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
 		if (!lksb) {
 			kfree(lock);
 			return NULL;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 856012b4fa4..0ad872055cb 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1939,7 +1939,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       int ignore_higher, u8 request_from, u32 flags)
 {
 	struct dlm_work_item *item;
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!item)
 		return -ENOMEM;
 
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index fb3e2b0817f..367a11e9e2e 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -757,7 +757,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
 		}
 		BUG_ON(num == dead_node);
 
-		ndata = kcalloc(1, sizeof(*ndata), GFP_NOFS);
+		ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
 		if (!ndata) {
 			dlm_destroy_recovery_area(dlm, dead_node);
 			return -ENOMEM;
@@ -842,7 +842,7 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
 	}
 	BUG_ON(lr->dead_node != dlm->reco.dead_node);
 
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!item) {
 		dlm_put(dlm);
 		return -ENOMEM;
@@ -1323,7 +1323,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	ret = -ENOMEM;
 	buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!buf || !item)
 		goto leave;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 698d79a74ef..4dedd978910 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -776,7 +776,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 {
 	int status;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index aa6f5aadedc..2d3ac32cb74 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -175,7 +175,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 	struct buffer_head *bh = NULL;
 	struct ocfs2_slot_info *si;
 
-	si = kcalloc(1, sizeof(struct ocfs2_slot_info), GFP_KERNEL);
+	si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
 	if (!si) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 000d71cca6c..6dbb1176275 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -488,7 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
 	int status;
 	u32 slot;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -530,7 +530,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 {
 	int status;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -595,7 +595,7 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a6d2f8cc165..6e300a88a47 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1231,7 +1231,7 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
 
 	BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN);
 
-	osb->uuid_str = kcalloc(1, OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
+	osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
 	if (osb->uuid_str == NULL)
 		return -ENOMEM;
 
@@ -1262,7 +1262,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
 	mlog_entry_void();
 
-	osb = kcalloc(1, sizeof(struct ocfs2_super), GFP_KERNEL);
+	osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
 	if (!osb) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -1387,7 +1387,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	 */
 	/* initialize our journal structure */
 
-	journal = kcalloc(1, sizeof(struct ocfs2_journal), GFP_KERNEL);
+	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
 	if (!journal) {
 		mlog(ML_ERROR, "unable to alloc journal\n");
 		status = -ENOMEM;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 0315a8b61ed..0afd8b9af70 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -479,7 +479,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response
 {
 	struct ocfs2_net_wait_ctxt *w;
 
-	w = kcalloc(1, sizeof(*w), GFP_NOFS);
+	w = kzalloc(sizeof(*w), GFP_NOFS);
 	if (!w) {
 		mlog_errno(-ENOMEM);
 		goto bail;
@@ -642,7 +642,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
 
 	BUG_ON(!ocfs2_is_valid_vote_request(type));
 
-	request = kcalloc(1, sizeof(*request), GFP_NOFS);
+	request = kzalloc(sizeof(*request), GFP_NOFS);
 	if (!request) {
 		mlog_errno(-ENOMEM);
 	} else {
-- 
cgit v1.2.3


From e8c5c045d790e7018c56f1a998a2d240b732ea3c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 13 Dec 2006 00:35:03 -0800
Subject: [PATCH] lockd endianness annotations

Annotated, all places switched to keeping status net-endian.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c | 10 +++++-----
 fs/lockd/clntproc.c | 39 ++++++++++++++++++++-------------------
 fs/lockd/svclock.c  |  4 ++--
 fs/lockd/xdr.c      |  8 ++++----
 fs/lockd/xdr4.c     |  8 ++++----
 fs/nfsd/lockd.c     |  2 +-
 6 files changed, 36 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 92681c9e9b2..06270774516 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -36,7 +36,7 @@ struct nlm_wait {
 	struct nlm_host *	b_host;
 	struct file_lock *	b_lock;		/* local file lock */
 	unsigned short		b_reclaim;	/* got to reclaim lock */
-	u32			b_status;	/* grant callback status */
+	__be32			b_status;	/* grant callback status */
 };
 
 static LIST_HEAD(nlm_blocked);
@@ -53,7 +53,7 @@ struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *
 		block->b_host = host;
 		block->b_lock = fl;
 		init_waitqueue_head(&block->b_wait);
-		block->b_status = NLM_LCK_BLOCKED;
+		block->b_status = nlm_lck_blocked;
 		list_add(&block->b_list, &nlm_blocked);
 	}
 	return block;
@@ -89,7 +89,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 	 * nlmclnt_lock for an explanation.
 	 */
 	ret = wait_event_interruptible_timeout(block->b_wait,
-			block->b_status != NLM_LCK_BLOCKED,
+			block->b_status != nlm_lck_blocked,
 			timeout);
 	if (ret < 0)
 		return -ERESTARTSYS;
@@ -131,7 +131,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock
 		/* Alright, we found a lock. Set the return status
 		 * and wake up the caller
 		 */
-		block->b_status = NLM_LCK_GRANTED;
+		block->b_status = nlm_granted;
 		wake_up(&block->b_wait);
 		res = nlm_granted;
 	}
@@ -211,7 +211,7 @@ restart:
 	/* Now, wake up all processes that sleep on a blocked lock */
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (block->b_host == host) {
-			block->b_status = NLM_LCK_DENIED_GRACE_PERIOD;
+			block->b_status = nlm_lck_denied_grace_period;
 			wake_up(&block->b_wait);
 		}
 	}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 80a1a6dccc8..0b4acc1c5e7 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -27,7 +27,7 @@
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_unlock(struct nlm_rqst *, struct file_lock *);
-static int	nlm_stat_to_errno(u32 stat);
+static int	nlm_stat_to_errno(__be32 stat);
 static void	nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host);
 static int	nlmclnt_cancel(struct nlm_host *, int , struct file_lock *);
 
@@ -325,7 +325,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc)
 			}
 			break;
 		} else
-		if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) {
+		if (resp->status == nlm_lck_denied_grace_period) {
 			dprintk("lockd: server in grace period\n");
 			if (argp->reclaim) {
 				printk(KERN_WARNING
@@ -411,10 +411,10 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 		goto out;
 
 	switch (req->a_res.status) {
-		case NLM_LCK_GRANTED:
+		case nlm_granted:
 			fl->fl_type = F_UNLCK;
 			break;
-		case NLM_LCK_DENIED:
+		case nlm_lck_denied:
 			/*
 			 * Report the conflicting lock back to the application.
 			 */
@@ -524,9 +524,9 @@ again:
 		if (!req->a_args.block)
 			break;
 		/* Did a reclaimer thread notify us of a server reboot? */
-		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
+		if (resp->status ==  nlm_lck_denied_grace_period)
 			continue;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (resp->status != nlm_lck_blocked)
 			break;
 		/* Wait on an NLM blocking lock */
 		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
@@ -535,11 +535,11 @@ again:
 		 */
 		if (status < 0)
 			goto out_unblock;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (resp->status != nlm_lck_blocked)
 			break;
 	}
 
-	if (resp->status == NLM_LCK_GRANTED) {
+	if (resp->status == nlm_granted) {
 		down_read(&host->h_rwsem);
 		/* Check whether or not the server has rebooted */
 		if (fl->fl_u.nfs_fl.state != host->h_state) {
@@ -556,7 +556,7 @@ again:
 out_unblock:
 	nlmclnt_finish_block(block);
 	/* Cancel the blocked request if it is still pending */
-	if (resp->status == NLM_LCK_BLOCKED)
+	if (resp->status == nlm_lck_blocked)
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
 	nlm_release_call(req);
@@ -585,12 +585,12 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
 	req->a_args.reclaim = 1;
 
 	if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0
-	 && req->a_res.status == NLM_LCK_GRANTED)
+	 && req->a_res.status == nlm_granted)
 		return 0;
 
 	printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d "
 				"(errno %d, status %d)\n", fl->fl_pid,
-				status, req->a_res.status);
+				status, ntohl(req->a_res.status));
 
 	/*
 	 * FIXME: This is a serious failure. We can
@@ -637,10 +637,10 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	if (status < 0)
 		goto out;
 
-	if (resp->status == NLM_LCK_GRANTED)
+	if (resp->status == nlm_granted)
 		goto out;
 
-	if (resp->status != NLM_LCK_DENIED_NOLOCKS)
+	if (resp->status != nlm_lck_denied_nolocks)
 		printk("lockd: unexpected unlock status: %d\n", resp->status);
 	/* What to do now? I'm out of my depth... */
 	status = -ENOLCK;
@@ -652,7 +652,7 @@ out:
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst	*req = data;
-	int		status = req->a_res.status;
+	u32 status = ntohl(req->a_res.status);
 
 	if (RPC_ASSASSINATED(task))
 		goto die;
@@ -720,6 +720,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst	*req = data;
+	u32 status = ntohl(req->a_res.status);
 
 	if (RPC_ASSASSINATED(task))
 		goto die;
@@ -731,9 +732,9 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 	}
 
 	dprintk("lockd: cancel status %u (task %u)\n",
-			req->a_res.status, task->tk_pid);
+			status, task->tk_pid);
 
-	switch (req->a_res.status) {
+	switch (status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
 	case NLM_LCK_DENIED:
@@ -744,7 +745,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 		goto retry_cancel;
 	default:
 		printk(KERN_NOTICE "lockd: weird return %d for CANCEL call\n",
-			req->a_res.status);
+			status);
 	}
 
 die:
@@ -768,9 +769,9 @@ static const struct rpc_call_ops nlmclnt_cancel_ops = {
  * Convert an NLM status code to a generic kernel errno
  */
 static int
-nlm_stat_to_errno(u32 status)
+nlm_stat_to_errno(__be32 status)
 {
-	switch(status) {
+	switch(ntohl(status)) {
 	case NLM_LCK_GRANTED:
 		return 0;
 	case NLM_LCK_DENIED:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 5c054b20fd5..c7db0a5bccd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -645,7 +645,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops = {
  * block.
  */
 void
-nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status)
+nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
 {
 	struct nlm_block	*block;
 
@@ -655,7 +655,7 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status)
 		return;
 
 	if (block) {
-		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
+		if (status == nlm_lck_denied_grace_period) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
 		} else {
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b7c949256e5..34dae5d7073 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -361,7 +361,7 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
-	resp->status = ntohl(*p++);
+	resp->status = *p++;
 	return xdr_argsize_check(rqstp, p);
 }
 
@@ -407,8 +407,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
-	resp->status = ntohl(*p++);
-	if (resp->status == NLM_LCK_DENIED) {
+	resp->status = *p++;
+	if (resp->status == nlm_lck_denied) {
 		struct file_lock	*fl = &resp->lock.fl;
 		u32			excl;
 		s32			start, len, end;
@@ -506,7 +506,7 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
-	resp->status = ntohl(*p++);
+	resp->status = *p++;
 	return 0;
 }
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index f4c0b2b9f75..a7824055121 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -367,7 +367,7 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
-	resp->status = ntohl(*p++);
+	resp->status = *p++;
 	return xdr_argsize_check(rqstp, p);
 }
 
@@ -413,8 +413,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
-	resp->status = ntohl(*p++);
-	if (resp->status == NLM_LCK_DENIED) {
+	resp->status = *p++;
+	if (resp->status == nlm_lck_denied) {
 		struct file_lock	*fl = &resp->lock.fl;
 		u32			excl;
 		s64			start, end, len;
@@ -512,7 +512,7 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
-	resp->status = ntohl(*p++);
+	resp->status = *p++;
 	return 0;
 }
 
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 11fdaf7721b..221acd1f11f 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -22,7 +22,7 @@
 /*
  * Note: we hold the dentry use count while the file is open.
  */
-static u32
+static __be32
 nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 {
 	__be32		nfserr;
-- 
cgit v1.2.3


From a71113da44063b587b5a4c2fc94c948a14f2bb43 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 Dec 2006 00:35:10 -0800
Subject: [PATCH] smbfs: Make conn_pid a struct pid

smbfs keeps track of the user space server process in conn_pid.  This converts
that track to use a struct pid instead of pid_t.  This keeps us safe from pid
wrap around issues and prepares the way for the pid namespace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/smbfs/inode.c  | 5 +++--
 fs/smbfs/proc.c   | 6 +++---
 fs/smbfs/smbiod.c | 5 +++--
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4af4cd729a5..84dfe3f3482 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -482,12 +482,13 @@ smb_put_super(struct super_block *sb)
 	smb_close_socket(server);
 
 	if (server->conn_pid)
-		kill_proc(server->conn_pid, SIGTERM, 1);
+		kill_pid(server->conn_pid, SIGTERM, 1);
 
 	kfree(server->ops);
 	smb_unload_nls(server);
 	sb->s_fs_info = NULL;
 	smb_unlock_server(server);
+	put_pid(server->conn_pid);
 	kfree(server);
 }
 
@@ -530,7 +531,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
 	INIT_LIST_HEAD(&server->xmitq);
 	INIT_LIST_HEAD(&server->recvq);
 	server->conn_error = 0;
-	server->conn_pid = 0;
+	server->conn_pid = NULL;
 	server->state = CONN_INVALID; /* no connection yet */
 	server->generation = 0;
 
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index a5ced9e0c6c..feac4605061 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -877,7 +877,7 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
 		goto out_putf;
 
 	server->sock_file = filp;
-	server->conn_pid = current->pid;
+	server->conn_pid = get_pid(task_pid(current));
 	server->opt = *opt;
 	server->generation += 1;
 	server->state = CONN_VALID;
@@ -971,8 +971,8 @@ smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
 	}
 
 	VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
-		server->opt.protocol, server->opt.max_xmit, server->conn_pid,
-		server->opt.capabilities);
+		server->opt.protocol, server->opt.max_xmit,
+		pid_nr(server->conn_pid), server->opt.capabilities);
 
 	/* FIXME: this really should be done by smbmount. */
 	if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index e6754044128..89eaf31f1d4 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -152,7 +152,7 @@ int smbiod_retry(struct smb_sb_info *server)
 {
 	struct list_head *head;
 	struct smb_request *req;
-	pid_t pid = server->conn_pid;
+	struct pid *pid = get_pid(server->conn_pid);
 	int result = 0;
 
 	VERBOSE("state: %d\n", server->state);
@@ -222,7 +222,7 @@ int smbiod_retry(struct smb_sb_info *server)
 	/*
 	 * Note: use the "priv" flag, as a user process may need to reconnect.
 	 */
-	result = kill_proc(pid, SIGUSR1, 1);
+	result = kill_pid(pid, SIGUSR1, 1);
 	if (result) {
 		/* FIXME: this is most likely fatal, umount? */
 		printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
@@ -233,6 +233,7 @@ int smbiod_retry(struct smb_sb_info *server)
 	/* FIXME: The retried requests should perhaps get a "time boost". */
 
 out:
+	put_pid(pid);
 	return result;
 }
 
-- 
cgit v1.2.3


From 2154227a2c6cf04e28576b59c684123eb0e81958 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 Dec 2006 00:35:11 -0800
Subject: [PATCH] ncpfs: Use struct pid to track the userspace watchdog process

This patch converts the tracking of the user space watchdog process from using
a pid_t to use struct pid.  This makes us safe from pid wrap around issues and
prepares the way for the pid namespace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Petr Vandrovec <VANDROVE@vc.cvut.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 47462ac9447..861d950ac99 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -331,7 +331,7 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
 	data->flags = 0;
 	data->int_flags = 0;
 	data->mounted_uid = 0;
-	data->wdog_pid = -1;
+	data->wdog_pid = NULL;
 	data->ncp_fd = ~0;
 	data->time_out = 10;
 	data->retry_count = 20;
@@ -371,7 +371,7 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
 				data->flags = optint;
 				break;
 			case 'w':
-				data->wdog_pid = optint;
+				data->wdog_pid = find_get_pid(optint);
 				break;
 			case 'n':
 				data->ncp_fd = optint;
@@ -425,7 +425,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 				data.flags = md->flags;
 				data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE;
 				data.mounted_uid = md->mounted_uid;
-				data.wdog_pid = md->wdog_pid;
+				data.wdog_pid = find_get_pid(md->wdog_pid);
 				data.ncp_fd = md->ncp_fd;
 				data.time_out = md->time_out;
 				data.retry_count = md->retry_count;
@@ -445,7 +445,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 				data.flags = md->flags;
 				data.int_flags = 0;
 				data.mounted_uid = md->mounted_uid;
-				data.wdog_pid = md->wdog_pid;
+				data.wdog_pid = find_get_pid(md->wdog_pid);
 				data.ncp_fd = md->ncp_fd;
 				data.time_out = md->time_out;
 				data.retry_count = md->retry_count;
@@ -711,7 +711,8 @@ static void ncp_put_super(struct super_block *sb)
 	if (server->info_filp)
 		fput(server->info_filp);
 	fput(server->ncp_filp);
-	kill_proc(server->m.wdog_pid, SIGTERM, 1);
+	kill_pid(server->m.wdog_pid, SIGTERM, 1);
+	put_pid(server->m.wdog_pid);
 
 	kfree(server->priv.data);
 	kfree(server->auth.object_name);
-- 
cgit v1.2.3


From 1de241268d7b6bea05d64f9269bf9aa90be49ff1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 Dec 2006 00:35:13 -0800
Subject: [PATCH] ncpfs: ensure we free wdog_pid on parse_option or fill_inode
 failure

This took a little refactoring but now errors are handled cleanly.  When
this code used pid_t values this wasn't necessary because you can't
leak a pid_t.

Thanks to Peter Vandrovec for spotting this.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Peter Vandrovec <vandrove@vc.cvut.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ncpfs/inode.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 861d950ac99..67a90bf795d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -327,6 +327,7 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
 	char *optarg;
 	unsigned long optint;
 	int version = 0;
+	int ret;
 
 	data->flags = 0;
 	data->int_flags = 0;
@@ -343,8 +344,9 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
 	data->mounted_vol[0] = 0;
 	
 	while ((optval = ncp_getopt("ncpfs", &options, ncp_opts, NULL, &optarg, &optint)) != 0) {
-		if (optval < 0)
-			return optval;
+		ret = optval;
+		if (ret < 0)
+			goto err;
 		switch (optval) {
 			case 'u':
 				data->uid = optint;
@@ -380,18 +382,21 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
 				data->info_fd = optint;
 				break;
 			case 'v':
-				if (optint < NCP_MOUNT_VERSION_V4) {
-					return -ECHRNG;
-				}
-				if (optint > NCP_MOUNT_VERSION_V5) {
-					return -ECHRNG;
-				}
+				ret = -ECHRNG;
+				if (optint < NCP_MOUNT_VERSION_V4)
+					goto err;
+				if (optint > NCP_MOUNT_VERSION_V5)
+					goto err;
 				version = optint;
 				break;
 			
 		}
 	}
 	return 0;
+err:
+	put_pid(data->wdog_pid);
+	data->wdog_pid = NULL;
+	return ret;
 }
 
 static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
@@ -409,6 +414,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 #endif
 	struct ncp_entry_info finfo;
 
+	data.wdog_pid = NULL;
 	server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
 	if (!server)
 		return -ENOMEM;
@@ -679,6 +685,7 @@ out_fput:
 	 */
 	fput(ncp_filp);
 out:
+	put_pid(data.wdog_pid);
 	sb->s_fs_info = NULL;
 	kfree(server);
 	return error;
-- 
cgit v1.2.3


From 69688262fb94e92a32f188b79c02dc32016d4927 Mon Sep 17 00:00:00 2001
From: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Date: Wed, 13 Dec 2006 00:35:14 -0800
Subject: [PATCH] update Tigran's email addresses

As Adrian pointed out recently, there were still a couple of places where
I should have fixed my email address.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index eac175ed9f4..134c99941a6 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,7 +1,7 @@
 /*
  *	fs/bfs/inode.c
  *	BFS superblock and inode operations.
- *	Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ *	Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
  *	From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
  *
  *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
@@ -18,7 +18,7 @@
 #include <asm/uaccess.h>
 #include "bfs.h"
 
-MODULE_AUTHOR("Tigran A. Aivazian <tigran@veritas.com>");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
 MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux");
 MODULE_LICENSE("GPL");
 
-- 
cgit v1.2.3


From 029530f810dd5147f7e59b939eb22cfbe0beea12 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 13 Dec 2006 00:35:16 -0800
Subject: [PATCH] one more EXPORT_UNUSED_SYMBOL removal

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index 1d3dda4fa70..707ac21700d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -450,8 +450,6 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 	return seg;
 }
 
-EXPORT_UNUSED_SYMBOL(iov_shorten);  /*  June 2006  */
-
 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
-- 
cgit v1.2.3


From 451c11a161168fbdbba17573d4b062fdd2a4c2c4 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:18 -0800
Subject: [PATCH] knfsd: nfsd4: remove a dprink from nfsd4_lock

This dprintk is printing the wrong error now, but it's probably an unnecessary
dprintk anyway; just remove it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b7179bd45a1..393624340d1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2759,7 +2759,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	conflock.fl_ops = NULL;
 	conflock.fl_lmops = NULL;
 	err = posix_lock_file_conf(filp, &file_lock, &conflock);
-	dprintk("NFSD: nfsd4_lock: posix_lock_file_conf status %d\n",status);
 	switch (-err) {
 	case 0: /* success! */
 		update_stateid(&lock_stp->st_stateid);
-- 
cgit v1.2.3


From e57101991156aaba97c630f38e880f0d4012edcd Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:20 -0800
Subject: [PATCH] knfsd: nfsd4: clarify units of COMPOUND_SLACK_SPACE

A comment here incorrectly states that "slack_space" is measured in words, not
bytes.  Remove the comment, and adjust a variable name and a few comments to
clarify the situation.

This is pure cleanup; there should be no change in functionality.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 50bc94243ca..793a0b0a6cd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -740,7 +740,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	struct svc_fh	*current_fh = NULL;
 	struct svc_fh	*save_fh = NULL;
 	struct nfs4_stateowner *replay_owner = NULL;
-	int		slack_space;    /* in words, not bytes! */
+	int		slack_bytes;
 	__be32		status;
 
 	status = nfserr_resource;
@@ -790,10 +790,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		 * failed response to the next operation.  If we don't
 		 * have enough room, fail with ERR_RESOURCE.
 		 */
-/* FIXME - is slack_space *really* words, or bytes??? - neilb */
-		slack_space = (char *)resp->end - (char *)resp->p;
-		if (slack_space < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) {
-			BUG_ON(slack_space < COMPOUND_ERR_SLACK_SPACE);
+		slack_bytes = (char *)resp->end - (char *)resp->p;
+		if (slack_bytes < COMPOUND_SLACK_SPACE
+				+ COMPOUND_ERR_SLACK_SPACE) {
+			BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE);
 			op->status = nfserr_resource;
 			goto encode_op;
 		}
-- 
cgit v1.2.3


From 4b41bd85d5f93af37c2730df172aeb54280be7c3 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:21 -0800
Subject: [PATCH] knfsd: nfsd: make exp_rootfh handle exp_parent errors

Since exp_parent can fail by returning an error (-EAGAIN) in addition to by
returning NULL, we should check for that case in exp_rootfh.

(TODO: we should check that userland handles these errors too.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index f37df46d2ea..0747bb5ffcf 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1104,6 +1104,10 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
 		 path, nd.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
 	exp = exp_parent(clp, nd.mnt, nd.dentry, NULL);
+	if (IS_ERR(exp)) {
+		err = PTR_ERR(exp);
+		goto out;
+	}
 	if (!exp) {
 		dprintk("nfsd: exp_rootfh export not found.\n");
 		goto out;
-- 
cgit v1.2.3


From 6899320c2cefe5ae6b606f820ba8b762ba21f34a Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:23 -0800
Subject: [PATCH] knfsd: nfsd: simplify exp_pseudoroot

Note there's no need for special handling of -EAGAIN here; nfserrno() does
what we want already.  So this is a pure cleanup with no change in
functionality.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 0747bb5ffcf..b0591cd172e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1163,12 +1163,10 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	mk_fsid_v1(fsidv, 0);
 
 	exp = exp_find(clp, 1, fsidv, creq);
-	if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
-		return nfserr_dropit;
+	if (IS_ERR(exp))
+		return nfserrno(PTR_ERR(exp));
 	if (exp == NULL)
 		return nfserr_perm;
-	else if (IS_ERR(exp))
-		return nfserrno(PTR_ERR(exp));
 	rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
 	exp_put(exp);
 	return rv;
-- 
cgit v1.2.3


From 021d3a72459191a76e8e482ee4937ba6bc9fd712 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:24 -0800
Subject: [PATCH] knfsd: nfsd4: handling more nfsd_cross_mnt errors in nfsd4
 readdir

This patch on its own causes no change in behavior, since nfsd_cross_mnt()
only returns -EAGAIN; but in the future I'd like it to also be able to return
-ETIMEDOUT, so we may as well handle any possible error here.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4xdr.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f3f239db04b..fea46368afb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1845,15 +1845,11 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
 	exp_get(exp);
 	if (d_mountpoint(dentry)) {
-		if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
-		/*
-		 * -EAGAIN is the only error returned from
-		 * nfsd_cross_mnt() and it indicates that an
-		 * up-call has  been initiated to fill in the export
-		 * options on exp.  When the answer comes back,
-		 * this call will be retried.
-		 */
-			nfserr = nfserr_dropit;
+		int err;
+
+		err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp);
+		if (err) {
+			nfserr = nfserrno(err);
 			goto out_put;
 		}
 
-- 
cgit v1.2.3


From e0bb89ef031f76dcb9c9d920d18b13948f1418da Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:25 -0800
Subject: [PATCH] knfsd: nfsd: don't drop silently on upcall deferral

To avoid tying up server threads when nfsd makes an upcall (to mountd, to get
export options, to idmapd, for nfsv4 name<->id mapping, etc.), we temporarily
"drop" the request and save enough information so that we can revisit it
later.

Certain failures during the deferral process can cause us to really drop the
request and never revisit it.

This is often less than ideal, and is unacceptable in the NFSv4 case--rfc 3530
forbids the server from dropping a request without also closing the
connection.

As a first step, we modify the deferral code to return -ETIMEDOUT (which is
translated to nfserr_jukebox in the v3 and v4 cases, and remains a drop in the
v2 case).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 11 ++++++++---
 fs/nfsd/nfsfh.c  |  6 ++++--
 fs/nfsd/vfs.c    |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b0591cd172e..1137d09c597 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -787,15 +787,20 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
 	key.ex_dentry = dentry;
 
 	exp = svc_export_lookup(&key);
-	if (exp != NULL) 
-		switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
+	if (exp != NULL)  {
+		int err;
+
+		err = cache_check(&svc_export_cache, &exp->h, reqp);
+		switch (err) {
 		case 0: break;
 		case -EAGAIN:
-			exp = ERR_PTR(-EAGAIN);
+		case -ETIMEDOUT:
+			exp = ERR_PTR(err);
 			break;
 		default:
 			exp = NULL;
 		}
+	}
 
 	return exp;
 }
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 727ab3bd450..b06bf9f70ef 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -169,9 +169,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle);
 		}
 
-		error = nfserr_dropit;
-		if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
+		if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN
+				|| PTR_ERR(exp) == -ETIMEDOUT)) {
+			error = nfserrno(PTR_ERR(exp));
 			goto out;
+		}
 
 		error = nfserr_stale; 
 		if (!exp || IS_ERR(exp))
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4883d758622..7a79c23aa6d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -99,7 +99,7 @@ static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
- * Returns -EAGAIN leaving *dpp and *expp unchanged, 
+ * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
  *  or nfs_ok having possibly changed *dpp and *expp
  */
 int
-- 
cgit v1.2.3


From ca3643171bc6e08b7c4d1f9a2ce659541a01a7fe Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:27 -0800
Subject: [PATCH] knfsd: nfsd4: pass saved and current fh together into nfsd4
 operations

Pass the saved and current filehandles together into all the nfsd4 compound
operations.

I want a unified interface to these operations so we can just call them by
pointer and throw out the huge switch statement.

Also I'll eventually want a structure like this--that holds the state used
during compound processing--for deferral.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c  | 348 ++++++++++++++++++++++++++++++----------------------
 fs/nfsd/nfs4state.c |  68 ++++++----
 2 files changed, 244 insertions(+), 172 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 793a0b0a6cd..1b6756aa013 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -162,7 +162,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 
 
 static inline __be32
-nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
+nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	   struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
 {
 	__be32 status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
@@ -179,11 +180,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	status = nfsd4_process_open1(open);
 	if (status == nfserr_replay_me) {
 		struct nfs4_replay *rp = &open->op_stateowner->so_replay;
-		fh_put(current_fh);
-		current_fh->fh_handle.fh_size = rp->rp_openfh_len;
-		memcpy(&current_fh->fh_handle.fh_base, rp->rp_openfh,
+		fh_put(&cstate->current_fh);
+		cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
+		memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
 				rp->rp_openfh_len);
-		status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+		status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
 		if (status)
 			dprintk("nfsd4_open: replay failed"
 				" restoring previous filehandle\n");
@@ -215,7 +216,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			 * (3) set open->op_truncate if the file is to be
 			 * truncated after opening, (4) do permission checking.
 			 */
-			status = do_open_lookup(rqstp, current_fh, open);
+			status = do_open_lookup(rqstp, &cstate->current_fh,
+						open);
 			if (status)
 				goto out;
 			break;
@@ -227,7 +229,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			 * open->op_truncate if the file is to be truncated
 			 * after opening, (3) do permission checking.
 			*/
-			status = do_open_fhandle(rqstp, current_fh, open);
+			status = do_open_fhandle(rqstp, &cstate->current_fh,
+						 open);
 			if (status)
 				goto out;
 			break;
@@ -248,7 +251,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	 * successful, it (1) truncates the file if open->op_truncate was
 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
 	 */
-	status = nfsd4_process_open2(rqstp, current_fh, open);
+	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
 out:
 	if (open->op_stateowner) {
 		nfs4_get_stateowner(open->op_stateowner);
@@ -262,52 +265,54 @@ out:
  * filehandle-manipulating ops.
  */
 static inline __be32
-nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
+nfsd4_getfh(struct nfsd4_compound_state *cstate, struct svc_fh **getfh)
 {
-	if (!current_fh->fh_dentry)
+	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
 
-	*getfh = current_fh;
+	*getfh = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static inline __be32
-nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh)
+nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct nfsd4_putfh *putfh)
 {
-	fh_put(current_fh);
-	current_fh->fh_handle.fh_size = putfh->pf_fhlen;
-	memcpy(&current_fh->fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen);
-	return fh_verify(rqstp, current_fh, 0, MAY_NOP);
+	fh_put(&cstate->current_fh);
+	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
+	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
+	       putfh->pf_fhlen);
+	return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
 }
 
 static inline __be32
-nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 {
 	__be32 status;
 
-	fh_put(current_fh);
-	status = exp_pseudoroot(rqstp->rq_client, current_fh,
+	fh_put(&cstate->current_fh);
+	status = exp_pseudoroot(rqstp->rq_client, &cstate->current_fh,
 			      &rqstp->rq_chandle);
 	return status;
 }
 
 static inline __be32
-nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+nfsd4_restorefh(struct nfsd4_compound_state *cstate)
 {
-	if (!save_fh->fh_dentry)
+	if (!cstate->save_fh.fh_dentry)
 		return nfserr_restorefh;
 
-	fh_dup2(current_fh, save_fh);
+	fh_dup2(&cstate->current_fh, &cstate->save_fh);
 	return nfs_ok;
 }
 
 static inline __be32
-nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+nfsd4_savefh(struct nfsd4_compound_state *cstate)
 {
-	if (!current_fh->fh_dentry)
+	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
 
-	fh_dup2(save_fh, current_fh);
+	fh_dup2(&cstate->save_fh, &cstate->current_fh);
 	return nfs_ok;
 }
 
@@ -315,17 +320,20 @@ nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
  * misc nfsv4 ops
  */
 static inline __be32
-nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access)
+nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_access *access)
 {
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
 		return nfserr_inval;
 
 	access->ac_resp_access = access->ac_req_access;
-	return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported);
+	return nfsd_access(rqstp, &cstate->current_fh, &access->ac_resp_access,
+			   &access->ac_supported);
 }
 
 static inline __be32
-nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
+nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_commit *commit)
 {
 	__be32 status;
 
@@ -333,14 +341,16 @@ nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_com
 	*p++ = nfssvc_boot.tv_sec;
 	*p++ = nfssvc_boot.tv_usec;
 
-	status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count);
+	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+			     commit->co_count);
 	if (status == nfserr_symlink)
 		status = nfserr_inval;
 	return status;
 }
 
 static __be32
-nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create)
+nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_create *create)
 {
 	struct svc_fh resfh;
 	__be32 status;
@@ -348,7 +358,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 
 	fh_init(&resfh, NFS4_FHSIZE);
 
-	status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE);
+	status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE);
 	if (status == nfserr_symlink)
 		status = nfserr_notdir;
 	if (status)
@@ -365,9 +375,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 		 */
 		create->cr_linkname[create->cr_linklen] = 0;
 
-		status = nfsd_symlink(rqstp, current_fh, create->cr_name,
-				      create->cr_namelen, create->cr_linkname,
-				      create->cr_linklen, &resfh, &create->cr_iattr);
+		status = nfsd_symlink(rqstp, &cstate->current_fh,
+				      create->cr_name, create->cr_namelen,
+				      create->cr_linkname, create->cr_linklen,
+				      &resfh, &create->cr_iattr);
 		break;
 
 	case NF4BLK:
@@ -375,9 +386,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 		if (MAJOR(rdev) != create->cr_specdata1 ||
 		    MINOR(rdev) != create->cr_specdata2)
 			return nfserr_inval;
-		status = nfsd_create(rqstp, current_fh, create->cr_name,
-				     create->cr_namelen, &create->cr_iattr,
-				     S_IFBLK, rdev, &resfh);
+		status = nfsd_create(rqstp, &cstate->current_fh,
+				     create->cr_name, create->cr_namelen,
+				     &create->cr_iattr, S_IFBLK, rdev, &resfh);
 		break;
 
 	case NF4CHR:
@@ -385,28 +396,28 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 		if (MAJOR(rdev) != create->cr_specdata1 ||
 		    MINOR(rdev) != create->cr_specdata2)
 			return nfserr_inval;
-		status = nfsd_create(rqstp, current_fh, create->cr_name,
-				     create->cr_namelen, &create->cr_iattr,
-				     S_IFCHR, rdev, &resfh);
+		status = nfsd_create(rqstp, &cstate->current_fh,
+				     create->cr_name, create->cr_namelen,
+				     &create->cr_iattr,S_IFCHR, rdev, &resfh);
 		break;
 
 	case NF4SOCK:
-		status = nfsd_create(rqstp, current_fh, create->cr_name,
-				     create->cr_namelen, &create->cr_iattr,
-				     S_IFSOCK, 0, &resfh);
+		status = nfsd_create(rqstp, &cstate->current_fh,
+				     create->cr_name, create->cr_namelen,
+				     &create->cr_iattr, S_IFSOCK, 0, &resfh);
 		break;
 
 	case NF4FIFO:
-		status = nfsd_create(rqstp, current_fh, create->cr_name,
-				     create->cr_namelen, &create->cr_iattr,
-				     S_IFIFO, 0, &resfh);
+		status = nfsd_create(rqstp, &cstate->current_fh,
+				     create->cr_name, create->cr_namelen,
+				     &create->cr_iattr, S_IFIFO, 0, &resfh);
 		break;
 
 	case NF4DIR:
 		create->cr_iattr.ia_valid &= ~ATTR_SIZE;
-		status = nfsd_create(rqstp, current_fh, create->cr_name,
-				     create->cr_namelen, &create->cr_iattr,
-				     S_IFDIR, 0, &resfh);
+		status = nfsd_create(rqstp, &cstate->current_fh,
+				     create->cr_name, create->cr_namelen,
+				     &create->cr_iattr, S_IFDIR, 0, &resfh);
 		break;
 
 	default:
@@ -414,9 +425,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 	}
 
 	if (!status) {
-		fh_unlock(current_fh);
-		set_change_info(&create->cr_cinfo, current_fh);
-		fh_dup2(current_fh, &resfh);
+		fh_unlock(&cstate->current_fh);
+		set_change_info(&create->cr_cinfo, &cstate->current_fh);
+		fh_dup2(&cstate->current_fh, &resfh);
 	}
 
 	fh_put(&resfh);
@@ -424,11 +435,12 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 }
 
 static inline __be32
-nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr)
+nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	      struct nfsd4_getattr *getattr)
 {
 	__be32 status;
 
-	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+	status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
 	if (status)
 		return status;
 
@@ -438,26 +450,27 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ge
 	getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
 	getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
 
-	getattr->ga_fhp = current_fh;
+	getattr->ga_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static inline __be32
-nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
-	   struct svc_fh *save_fh, struct nfsd4_link *link)
+nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	   struct nfsd4_link *link)
 {
 	__be32 status = nfserr_nofilehandle;
 
-	if (!save_fh->fh_dentry)
+	if (!cstate->save_fh.fh_dentry)
 		return status;
-	status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh);
+	status = nfsd_link(rqstp, &cstate->current_fh,
+			   link->li_name, link->li_namelen, &cstate->save_fh);
 	if (!status)
-		set_change_info(&link->li_cinfo, current_fh);
+		set_change_info(&link->li_cinfo, &cstate->current_fh);
 	return status;
 }
 
 static __be32
-nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 {
 	struct svc_fh tmp_fh;
 	__be32 ret;
@@ -466,22 +479,27 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 	if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
 			      &rqstp->rq_chandle)) != 0)
 		return ret;
-	if (tmp_fh.fh_dentry == current_fh->fh_dentry) {
+	if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) {
 		fh_put(&tmp_fh);
 		return nfserr_noent;
 	}
 	fh_put(&tmp_fh);
-	return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
+	return nfsd_lookup(rqstp, &cstate->current_fh,
+			   "..", 2, &cstate->current_fh);
 }
 
 static inline __be32
-nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup)
+nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_lookup *lookup)
 {
-	return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh);
+	return nfsd_lookup(rqstp, &cstate->current_fh,
+			   lookup->lo_name, lookup->lo_len,
+			   &cstate->current_fh);
 }
 
 static inline __be32
-nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
+nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	   struct nfsd4_read *read)
 {
 	__be32 status;
 
@@ -493,7 +511,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read
 
 	nfs4_lock_state();
 	/* check stateid */
-	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
+	if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh,
+				&read->rd_stateid,
 				CHECK_FH | RD_STATE, &read->rd_filp))) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
@@ -504,12 +523,13 @@ nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read
 out:
 	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
-	read->rd_fhp = current_fh;
+	read->rd_fhp = &cstate->current_fh;
 	return status;
 }
 
 static inline __be32
-nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
+nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	      struct nfsd4_readdir *readdir)
 {
 	u64 cookie = readdir->rd_cookie;
 	static const nfs4_verifier zeroverf;
@@ -527,48 +547,51 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_re
 		return nfserr_bad_cookie;
 
 	readdir->rd_rqstp = rqstp;
-	readdir->rd_fhp = current_fh;
+	readdir->rd_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static inline __be32
-nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink)
+nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	       struct nfsd4_readlink *readlink)
 {
 	readlink->rl_rqstp = rqstp;
-	readlink->rl_fhp = current_fh;
+	readlink->rl_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static inline __be32
-nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove)
+nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_remove *remove)
 {
 	__be32 status;
 
 	if (nfs4_in_grace())
 		return nfserr_grace;
-	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
+	status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
+			     remove->rm_name, remove->rm_namelen);
 	if (status == nfserr_symlink)
 		return nfserr_notdir;
 	if (!status) {
-		fh_unlock(current_fh);
-		set_change_info(&remove->rm_cinfo, current_fh);
+		fh_unlock(&cstate->current_fh);
+		set_change_info(&remove->rm_cinfo, &cstate->current_fh);
 	}
 	return status;
 }
 
 static inline __be32
-nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
-	     struct svc_fh *save_fh, struct nfsd4_rename *rename)
+nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_rename *rename)
 {
 	__be32 status = nfserr_nofilehandle;
 
-	if (!save_fh->fh_dentry)
+	if (!cstate->save_fh.fh_dentry)
 		return status;
-	if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+	if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags
 					& NFSEXP_NOSUBTREECHECK))
 		return nfserr_grace;
-	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
-			     rename->rn_snamelen, current_fh,
+	status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
+			     rename->rn_snamelen, &cstate->current_fh,
 			     rename->rn_tname, rename->rn_tnamelen);
 
 	/* the underlying filesystem returns different error's than required
@@ -576,27 +599,28 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	if (status == nfserr_isdir)
 		status = nfserr_exist;
 	else if ((status == nfserr_notdir) &&
-                  (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) &&
-                   S_ISDIR(current_fh->fh_dentry->d_inode->i_mode)))
+                  (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) &&
+                   S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode)))
 		status = nfserr_exist;
 	else if (status == nfserr_symlink)
 		status = nfserr_notdir;
 
 	if (!status) {
-		set_change_info(&rename->rn_sinfo, current_fh);
-		set_change_info(&rename->rn_tinfo, save_fh);
+		set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+		set_change_info(&rename->rn_tinfo, &cstate->save_fh);
 	}
 	return status;
 }
 
 static inline __be32
-nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
+nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	      struct nfsd4_setattr *setattr)
 {
 	__be32 status = nfs_ok;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		nfs4_lock_state();
-		status = nfs4_preprocess_stateid_op(current_fh,
+		status = nfs4_preprocess_stateid_op(&cstate->current_fh,
 			&setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
 		nfs4_unlock_state();
 		if (status) {
@@ -606,16 +630,18 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
 	}
 	status = nfs_ok;
 	if (setattr->sa_acl != NULL)
-		status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
+		status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
+					    setattr->sa_acl);
 	if (status)
 		return status;
-	status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
+	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
 				0, (time_t)0);
 	return status;
 }
 
 static inline __be32
-nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
+nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct nfsd4_write *write)
 {
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
@@ -628,7 +654,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 		return nfserr_inval;
 
 	nfs4_lock_state();
-	status = nfs4_preprocess_stateid_op(current_fh, stateid,
+	status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid,
 					CHECK_FH | WR_STATE, &filp);
 	if (filp)
 		get_file(filp);
@@ -645,9 +671,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 	*p++ = nfssvc_boot.tv_sec;
 	*p++ = nfssvc_boot.tv_usec;
 
-	status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
-			rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
-			&write->wr_how_written);
+	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+			     write->wr_buflen, &write->wr_how_written);
 	if (filp)
 		fput(filp);
 
@@ -662,13 +688,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
  * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
  */
 static __be32
-nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
+nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_verify *verify)
 {
 	__be32 *buf, *p;
 	int count;
 	__be32 status;
 
-	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+	status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
 	if (status)
 		return status;
 
@@ -689,8 +716,9 @@ nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ver
 	if (!buf)
 		return nfserr_resource;
 
-	status = nfsd4_encode_fattr(current_fh, current_fh->fh_export,
-				    current_fh->fh_dentry, buf,
+	status = nfsd4_encode_fattr(&cstate->current_fh,
+				    cstate->current_fh.fh_export,
+				    cstate->current_fh.fh_dentry, buf,
 				    &count, verify->ve_bmval,
 				    rqstp);
 
@@ -727,6 +755,26 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 		nfsdstats.nfs4_opcount[opnum]++;
 }
 
+static void cstate_free(struct nfsd4_compound_state *cstate)
+{
+	if (cstate == NULL)
+		return;
+	fh_put(&cstate->current_fh);
+	fh_put(&cstate->save_fh);
+	kfree(cstate);
+}
+
+static struct nfsd4_compound_state *cstate_alloc(void)
+{
+	struct nfsd4_compound_state *cstate;
+
+	cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL);
+	if (cstate == NULL)
+		return NULL;
+	fh_init(&cstate->current_fh, NFS4_FHSIZE);
+	fh_init(&cstate->save_fh, NFS4_FHSIZE);
+	return cstate;
+}
 
 /*
  * COMPOUND call.
@@ -737,21 +785,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		    struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_op	*op;
-	struct svc_fh	*current_fh = NULL;
-	struct svc_fh	*save_fh = NULL;
+	struct nfsd4_compound_state *cstate = NULL;
 	struct nfs4_stateowner *replay_owner = NULL;
 	int		slack_bytes;
 	__be32		status;
 
 	status = nfserr_resource;
-	current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL);
-	if (current_fh == NULL)
-		goto out;
-	fh_init(current_fh, NFS4_FHSIZE);
-	save_fh = kmalloc(sizeof(*save_fh), GFP_KERNEL);
-	if (save_fh == NULL)
+	cstate = cstate_alloc();
+	if (cstate == NULL)
 		goto out;
-	fh_init(save_fh, NFS4_FHSIZE);
 
 	resp->xbuf = &rqstp->rq_res;
 	resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
@@ -802,7 +844,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
 		* require a valid current filehandle
 		*/
-		if (!current_fh->fh_dentry) {
+		if (!cstate->current_fh.fh_dentry) {
 			if (!((op->opnum == OP_PUTFH) ||
 			      (op->opnum == OP_PUTROOTFH) ||
 			      (op->opnum == OP_SETCLIENTID) ||
@@ -817,7 +859,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		/* Check must be done at start of each operation, except
 		 * for GETATTR and ops not listed as returning NFS4ERR_MOVED
 		 */
-		else if (current_fh->fh_export->ex_fslocs.migrated &&
+		else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
 			 !((op->opnum == OP_GETATTR) ||
 			   (op->opnum == OP_PUTROOTFH) ||
 			   (op->opnum == OP_PUTPUBFH) ||
@@ -829,90 +871,110 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		}
 		switch (op->opnum) {
 		case OP_ACCESS:
-			op->status = nfsd4_access(rqstp, current_fh, &op->u.access);
+			op->status = nfsd4_access(rqstp, cstate,
+						  &op->u.access);
 			break;
 		case OP_CLOSE:
-			op->status = nfsd4_close(rqstp, current_fh, &op->u.close, &replay_owner);
+			op->status = nfsd4_close(rqstp, cstate,
+						 &op->u.close, &replay_owner);
 			break;
 		case OP_COMMIT:
-			op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit);
+			op->status = nfsd4_commit(rqstp, cstate,
+						  &op->u.commit);
 			break;
 		case OP_CREATE:
-			op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
+			op->status = nfsd4_create(rqstp, cstate,
+						  &op->u.create);
 			break;
 		case OP_DELEGRETURN:
-			op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
+			op->status = nfsd4_delegreturn(rqstp, cstate,
+						       &op->u.delegreturn);
 			break;
 		case OP_GETATTR:
-			op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
+			op->status = nfsd4_getattr(rqstp, cstate,
+						   &op->u.getattr);
 			break;
 		case OP_GETFH:
-			op->status = nfsd4_getfh(current_fh, &op->u.getfh);
+			op->status = nfsd4_getfh(cstate, &op->u.getfh);
 			break;
 		case OP_LINK:
-			op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link);
+			op->status = nfsd4_link(rqstp, cstate, &op->u.link);
 			break;
 		case OP_LOCK:
-			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock, &replay_owner);
+			op->status = nfsd4_lock(rqstp, cstate, &op->u.lock,
+						&replay_owner);
 			break;
 		case OP_LOCKT:
-			op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt);
+			op->status = nfsd4_lockt(rqstp, cstate, &op->u.lockt);
 			break;
 		case OP_LOCKU:
-			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku, &replay_owner);
+			op->status = nfsd4_locku(rqstp, cstate, &op->u.locku,
+						 &replay_owner);
 			break;
 		case OP_LOOKUP:
-			op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup);
+			op->status = nfsd4_lookup(rqstp, cstate,
+						  &op->u.lookup);
 			break;
 		case OP_LOOKUPP:
-			op->status = nfsd4_lookupp(rqstp, current_fh);
+			op->status = nfsd4_lookupp(rqstp, cstate);
 			break;
 		case OP_NVERIFY:
-			op->status = nfsd4_verify(rqstp, current_fh, &op->u.nverify);
+			op->status = nfsd4_verify(rqstp, cstate,
+						  &op->u.nverify);
 			if (op->status == nfserr_not_same)
 				op->status = nfs_ok;
 			break;
 		case OP_OPEN:
-			op->status = nfsd4_open(rqstp, current_fh, &op->u.open, &replay_owner);
+			op->status = nfsd4_open(rqstp, cstate,
+						&op->u.open, &replay_owner);
 			break;
 		case OP_OPEN_CONFIRM:
-			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm, &replay_owner);
+			op->status = nfsd4_open_confirm(rqstp, cstate,
+							&op->u.open_confirm,
+							&replay_owner);
 			break;
 		case OP_OPEN_DOWNGRADE:
-			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade, &replay_owner);
+			op->status = nfsd4_open_downgrade(rqstp, cstate,
+							 &op->u.open_downgrade,
+							 &replay_owner);
 			break;
 		case OP_PUTFH:
-			op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh);
+			op->status = nfsd4_putfh(rqstp, cstate, &op->u.putfh);
 			break;
 		case OP_PUTROOTFH:
-			op->status = nfsd4_putrootfh(rqstp, current_fh);
+			op->status = nfsd4_putrootfh(rqstp, cstate);
 			break;
 		case OP_READ:
-			op->status = nfsd4_read(rqstp, current_fh, &op->u.read);
+			op->status = nfsd4_read(rqstp, cstate, &op->u.read);
 			break;
 		case OP_READDIR:
-			op->status = nfsd4_readdir(rqstp, current_fh, &op->u.readdir);
+			op->status = nfsd4_readdir(rqstp, cstate,
+						   &op->u.readdir);
 			break;
 		case OP_READLINK:
-			op->status = nfsd4_readlink(rqstp, current_fh, &op->u.readlink);
+			op->status = nfsd4_readlink(rqstp, cstate,
+						    &op->u.readlink);
 			break;
 		case OP_REMOVE:
-			op->status = nfsd4_remove(rqstp, current_fh, &op->u.remove);
+			op->status = nfsd4_remove(rqstp, cstate,
+						  &op->u.remove);
 			break;
 		case OP_RENAME:
-			op->status = nfsd4_rename(rqstp, current_fh, save_fh, &op->u.rename);
+			op->status = nfsd4_rename(rqstp, cstate,
+						  &op->u.rename);
 			break;
 		case OP_RENEW:
 			op->status = nfsd4_renew(&op->u.renew);
 			break;
 		case OP_RESTOREFH:
-			op->status = nfsd4_restorefh(current_fh, save_fh);
+			op->status = nfsd4_restorefh(cstate);
 			break;
 		case OP_SAVEFH:
-			op->status = nfsd4_savefh(current_fh, save_fh);
+			op->status = nfsd4_savefh(cstate);
 			break;
 		case OP_SETATTR:
-			op->status = nfsd4_setattr(rqstp, current_fh, &op->u.setattr);
+			op->status = nfsd4_setattr(rqstp, cstate,
+						   &op->u.setattr);
 			break;
 		case OP_SETCLIENTID:
 			op->status = nfsd4_setclientid(rqstp, &op->u.setclientid);
@@ -921,12 +983,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm);
 			break;
 		case OP_VERIFY:
-			op->status = nfsd4_verify(rqstp, current_fh, &op->u.verify);
+			op->status = nfsd4_verify(rqstp, cstate,
+						  &op->u.verify);
 			if (op->status == nfserr_same)
 				op->status = nfs_ok;
 			break;
 		case OP_WRITE:
-			op->status = nfsd4_write(rqstp, current_fh, &op->u.write);
+			op->status = nfsd4_write(rqstp, cstate, &op->u.write);
 			break;
 		case OP_RELEASE_LOCKOWNER:
 			op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner);
@@ -958,12 +1021,7 @@ encode_op:
 
 out:
 	nfsd4_release_compoundargs(args);
-	if (current_fh)
-		fh_put(current_fh);
-	kfree(current_fh);
-	if (save_fh)
-		fh_put(save_fh);
-	kfree(save_fh);
+	cstate_free(cstate);
 	return status;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 393624340d1..f0a994e3b2c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2241,24 +2241,26 @@ check_replay:
 }
 
 __be32
-nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc, struct nfs4_stateowner **replay_owner)
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		   struct nfsd4_open_confirm *oc,
+		   struct nfs4_stateowner **replay_owner)
 {
 	__be32 status;
 	struct nfs4_stateowner *sop;
 	struct nfs4_stateid *stp;
 
 	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
-			(int)current_fh->fh_dentry->d_name.len,
-			current_fh->fh_dentry->d_name.name);
+			(int)cstate->current_fh.fh_dentry->d_name.len,
+			cstate->current_fh.fh_dentry->d_name.name);
 
-	status = fh_verify(rqstp, current_fh, S_IFREG, 0);
+	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
 	if (status)
 		return status;
 
 	nfs4_lock_state();
 
-	if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid,
-					&oc->oc_req_stateid,
+	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
+					oc->oc_seqid, &oc->oc_req_stateid,
 					CHECK_FH | CONFIRM | OPEN_STATE,
 					&oc->oc_stateowner, &stp, NULL)))
 		goto out; 
@@ -2310,22 +2312,26 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 }
 
 __be32
-nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od, struct nfs4_stateowner **replay_owner)
+nfsd4_open_downgrade(struct svc_rqst *rqstp,
+		     struct nfsd4_compound_state *cstate,
+		     struct nfsd4_open_downgrade *od,
+		     struct nfs4_stateowner **replay_owner)
 {
 	__be32 status;
 	struct nfs4_stateid *stp;
 	unsigned int share_access;
 
 	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
-			(int)current_fh->fh_dentry->d_name.len,
-			current_fh->fh_dentry->d_name.name);
+			(int)cstate->current_fh.fh_dentry->d_name.len,
+			cstate->current_fh.fh_dentry->d_name.name);
 
 	if (!access_valid(od->od_share_access)
 			|| !deny_valid(od->od_share_deny))
 		return nfserr_inval;
 
 	nfs4_lock_state();
-	if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, 
+	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
+					od->od_seqid,
 					&od->od_stateid, 
 					CHECK_FH | OPEN_STATE, 
 					&od->od_stateowner, &stp, NULL)))
@@ -2365,18 +2371,20 @@ out:
  * nfs4_unlock_state() called after encode
  */
 __be32
-nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
+nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
 {
 	__be32 status;
 	struct nfs4_stateid *stp;
 
 	dprintk("NFSD: nfsd4_close on file %.*s\n", 
-			(int)current_fh->fh_dentry->d_name.len,
-			current_fh->fh_dentry->d_name.name);
+			(int)cstate->current_fh.fh_dentry->d_name.len,
+			cstate->current_fh.fh_dentry->d_name.name);
 
 	nfs4_lock_state();
 	/* check close_lru for replay */
-	if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, 
+	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
+					close->cl_seqid,
 					&close->cl_stateid, 
 					CHECK_FH | OPEN_STATE | CLOSE_STATE,
 					&close->cl_stateowner, &stp, NULL)))
@@ -2404,15 +2412,17 @@ out:
 }
 
 __be32
-nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		  struct nfsd4_delegreturn *dr)
 {
 	__be32 status;
 
-	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		goto out;
 
 	nfs4_lock_state();
-	status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
+	status = nfs4_preprocess_stateid_op(&cstate->current_fh,
+					    &dr->dr_stateid, DELEG_RET, NULL);
 	nfs4_unlock_state();
 out:
 	return status;
@@ -2635,7 +2645,8 @@ check_lock_length(u64 offset, u64 length)
  *  LOCK operation 
  */
 __be32
-nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
+nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	   struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateowner *open_sop = NULL;
 	struct nfs4_stateowner *lock_sop = NULL;
@@ -2654,7 +2665,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	if (check_lock_length(lock->lk_offset, lock->lk_length))
 		 return nfserr_inval;
 
-	if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
+	if ((status = fh_verify(rqstp, &cstate->current_fh,
+				S_IFREG, MAY_LOCK))) {
 		dprintk("NFSD: nfsd4_lock: permission denied!\n");
 		return status;
 	}
@@ -2675,7 +2687,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 			goto out;
 
 		/* validate and update open stateid and open seqid */
-		status = nfs4_preprocess_seqid_op(current_fh, 
+		status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
 		                        CHECK_FH | OPEN_STATE,
@@ -2702,7 +2714,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 			goto out;
 	} else {
 		/* lock (lock owner + lock stateid) already exists */
-		status = nfs4_preprocess_seqid_op(current_fh,
+		status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 				       lock->lk_old_lock_seqid, 
 				       &lock->lk_old_lock_stateid, 
 				       CHECK_FH | LOCK_STATE, 
@@ -2794,7 +2806,8 @@ out:
  * LOCKT operation
  */
 __be32
-nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
+nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct nfsd4_lockt *lockt)
 {
 	struct inode *inode;
 	struct file file;
@@ -2815,14 +2828,14 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	if (STALE_CLIENTID(&lockt->lt_clientid))
 		goto out;
 
-	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
+	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) {
 		dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
 		if (status == nfserr_symlink)
 			status = nfserr_inval;
 		goto out;
 	}
 
-	inode = current_fh->fh_dentry->d_inode;
+	inode = cstate->current_fh.fh_dentry->d_inode;
 	locks_init_lock(&file_lock);
 	switch (lockt->lt_type) {
 		case NFS4_READ_LT:
@@ -2861,7 +2874,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	 * only the dentry:inode set.
 	 */
 	memset(&file, 0, sizeof (struct file));
-	file.f_path.dentry = current_fh->fh_dentry;
+	file.f_path.dentry = cstate->current_fh.fh_dentry;
 
 	status = nfs_ok;
 	if (posix_test_lock(&file, &file_lock, &conflock)) {
@@ -2874,7 +2887,8 @@ out:
 }
 
 __be32
-nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
+nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
@@ -2891,7 +2905,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 
 	nfs4_lock_state();
 									        
-	if ((status = nfs4_preprocess_seqid_op(current_fh, 
+	if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
 					locku->lu_seqid, 
 					&locku->lu_stateid, 
 					CHECK_FH | LOCK_STATE, 
-- 
cgit v1.2.3


From d9e626f1e23358487595c2d3901126d00f9de7e0 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:28 -0800
Subject: [PATCH] knfsd: nfsd4: remove spurious replay_owner check

OK, this is embarassing--I've even looked back at the history, and cannot for
the life of me figure out why I added this check.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1b6756aa013..0166b49fb23 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1008,7 +1008,7 @@ encode_op:
 			nfsd4_encode_operation(resp, op);
 			status = op->status;
 		}
-		if (replay_owner && (replay_owner != (void *)(-1))) {
+		if (replay_owner) {
 			nfs4_put_stateowner(replay_owner);
 			replay_owner = NULL;
 		}
-- 
cgit v1.2.3


From a4f1706a9bd94d0e33e853a7e9f40b2650d54fbf Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:28 -0800
Subject: [PATCH] knfsd: nfsd4: move replay_owner to cstate

Tuck away the replay_owner in the cstate while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c  | 31 ++++++++++++++-----------------
 fs/nfsd/nfs4state.c | 22 ++++++++++------------
 2 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0166b49fb23..0972cb33d7d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -163,7 +163,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 
 static inline __be32
 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
+	   struct nfsd4_open *open)
 {
 	__be32 status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
@@ -255,7 +255,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
 	if (open->op_stateowner) {
 		nfs4_get_stateowner(open->op_stateowner);
-		*replay_owner = open->op_stateowner;
+		cstate->replay_owner = open->op_stateowner;
 	}
 	nfs4_unlock_state();
 	return status;
@@ -761,6 +761,7 @@ static void cstate_free(struct nfsd4_compound_state *cstate)
 		return;
 	fh_put(&cstate->current_fh);
 	fh_put(&cstate->save_fh);
+	BUG_ON(cstate->replay_owner);
 	kfree(cstate);
 }
 
@@ -773,6 +774,7 @@ static struct nfsd4_compound_state *cstate_alloc(void)
 		return NULL;
 	fh_init(&cstate->current_fh, NFS4_FHSIZE);
 	fh_init(&cstate->save_fh, NFS4_FHSIZE);
+	cstate->replay_owner = NULL;
 	return cstate;
 }
 
@@ -786,7 +788,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 {
 	struct nfsd4_op	*op;
 	struct nfsd4_compound_state *cstate = NULL;
-	struct nfs4_stateowner *replay_owner = NULL;
 	int		slack_bytes;
 	__be32		status;
 
@@ -876,7 +877,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			break;
 		case OP_CLOSE:
 			op->status = nfsd4_close(rqstp, cstate,
-						 &op->u.close, &replay_owner);
+						 &op->u.close);
 			break;
 		case OP_COMMIT:
 			op->status = nfsd4_commit(rqstp, cstate,
@@ -901,15 +902,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_link(rqstp, cstate, &op->u.link);
 			break;
 		case OP_LOCK:
-			op->status = nfsd4_lock(rqstp, cstate, &op->u.lock,
-						&replay_owner);
+			op->status = nfsd4_lock(rqstp, cstate, &op->u.lock);
 			break;
 		case OP_LOCKT:
 			op->status = nfsd4_lockt(rqstp, cstate, &op->u.lockt);
 			break;
 		case OP_LOCKU:
-			op->status = nfsd4_locku(rqstp, cstate, &op->u.locku,
-						 &replay_owner);
+			op->status = nfsd4_locku(rqstp, cstate, &op->u.locku);
 			break;
 		case OP_LOOKUP:
 			op->status = nfsd4_lookup(rqstp, cstate,
@@ -926,17 +925,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			break;
 		case OP_OPEN:
 			op->status = nfsd4_open(rqstp, cstate,
-						&op->u.open, &replay_owner);
+						&op->u.open);
 			break;
 		case OP_OPEN_CONFIRM:
 			op->status = nfsd4_open_confirm(rqstp, cstate,
-							&op->u.open_confirm,
-							&replay_owner);
+							&op->u.open_confirm);
 			break;
 		case OP_OPEN_DOWNGRADE:
 			op->status = nfsd4_open_downgrade(rqstp, cstate,
-							 &op->u.open_downgrade,
-							 &replay_owner);
+							&op->u.open_downgrade);
 			break;
 		case OP_PUTFH:
 			op->status = nfsd4_putfh(rqstp, cstate, &op->u.putfh);
@@ -1001,16 +998,16 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 
 encode_op:
 		if (op->status == nfserr_replay_me) {
-			op->replay = &replay_owner->so_replay;
+			op->replay = &cstate->replay_owner->so_replay;
 			nfsd4_encode_replay(resp, op);
 			status = op->status = op->replay->rp_status;
 		} else {
 			nfsd4_encode_operation(resp, op);
 			status = op->status;
 		}
-		if (replay_owner) {
-			nfs4_put_stateowner(replay_owner);
-			replay_owner = NULL;
+		if (cstate->replay_owner) {
+			nfs4_put_stateowner(cstate->replay_owner);
+			cstate->replay_owner = NULL;
 		}
 		/* XXX Ugh, we need to get rid of this kind of special case: */
 		if (op->opnum == OP_READ && op->u.read.rd_filp)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f0a994e3b2c..db701c47a34 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2242,8 +2242,7 @@ check_replay:
 
 __be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_open_confirm *oc,
-		   struct nfs4_stateowner **replay_owner)
+		   struct nfsd4_open_confirm *oc)
 {
 	__be32 status;
 	struct nfs4_stateowner *sop;
@@ -2280,7 +2279,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
 	if (oc->oc_stateowner) {
 		nfs4_get_stateowner(oc->oc_stateowner);
-		*replay_owner = oc->oc_stateowner;
+		cstate->replay_owner = oc->oc_stateowner;
 	}
 	nfs4_unlock_state();
 	return status;
@@ -2314,8 +2313,7 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 __be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_open_downgrade *od,
-		     struct nfs4_stateowner **replay_owner)
+		     struct nfsd4_open_downgrade *od)
 {
 	__be32 status;
 	struct nfs4_stateid *stp;
@@ -2361,7 +2359,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 out:
 	if (od->od_stateowner) {
 		nfs4_get_stateowner(od->od_stateowner);
-		*replay_owner = od->od_stateowner;
+		cstate->replay_owner = od->od_stateowner;
 	}
 	nfs4_unlock_state();
 	return status;
@@ -2372,7 +2370,7 @@ out:
  */
 __be32
 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
+	    struct nfsd4_close *close)
 {
 	__be32 status;
 	struct nfs4_stateid *stp;
@@ -2405,7 +2403,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
 	if (close->cl_stateowner) {
 		nfs4_get_stateowner(close->cl_stateowner);
-		*replay_owner = close->cl_stateowner;
+		cstate->replay_owner = close->cl_stateowner;
 	}
 	nfs4_unlock_state();
 	return status;
@@ -2646,7 +2644,7 @@ check_lock_length(u64 offset, u64 length)
  */
 __be32
 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
+	   struct nfsd4_lock *lock)
 {
 	struct nfs4_stateowner *open_sop = NULL;
 	struct nfs4_stateowner *lock_sop = NULL;
@@ -2796,7 +2794,7 @@ out:
 		release_stateowner(lock_sop);
 	if (lock->lk_replay_owner) {
 		nfs4_get_stateowner(lock->lk_replay_owner);
-		*replay_owner = lock->lk_replay_owner;
+		cstate->replay_owner = lock->lk_replay_owner;
 	}
 	nfs4_unlock_state();
 	return status;
@@ -2888,7 +2886,7 @@ out:
 
 __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
+	    struct nfsd4_locku *locku)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
@@ -2946,7 +2944,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
 	if (locku->lu_stateowner) {
 		nfs4_get_stateowner(locku->lu_stateowner);
-		*replay_owner = locku->lu_stateowner;
+		cstate->replay_owner = locku->lu_stateowner;
 	}
 	nfs4_unlock_state();
 	return status;
-- 
cgit v1.2.3


From 7191155bd3abbbf62368a0528f7610741d007a58 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:29 -0800
Subject: [PATCH] knfsd: nfsd4: don't inline nfsd4 compound op functions

The inlining contributes to bloating the stack of nfsd4_compound, and I want
to change the compound op functions to function pointers anyway.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 43 ++++++++++++++++++-------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0972cb33d7d..9fcf46a7d95 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -33,13 +33,6 @@
  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Note: some routines in this file are just trivial wrappers
- * (e.g. nfsd4_lookup()) defined solely for the sake of consistent
- * naming.  Since all such routines have been declared "inline",
- * there shouldn't be any associated overhead.  At some point in
- * the future, I might inline these "by hand" to clean up a
- * little.
  */
 
 #include <linux/param.h>
@@ -161,7 +154,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 }
 
 
-static inline __be32
+static __be32
 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	   struct nfsd4_open *open)
 {
@@ -264,7 +257,7 @@ out:
 /*
  * filehandle-manipulating ops.
  */
-static inline __be32
+static __be32
 nfsd4_getfh(struct nfsd4_compound_state *cstate, struct svc_fh **getfh)
 {
 	if (!cstate->current_fh.fh_dentry)
@@ -274,7 +267,7 @@ nfsd4_getfh(struct nfsd4_compound_state *cstate, struct svc_fh **getfh)
 	return nfs_ok;
 }
 
-static inline __be32
+static __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    struct nfsd4_putfh *putfh)
 {
@@ -285,7 +278,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
 }
 
-static inline __be32
+static __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 {
 	__be32 status;
@@ -296,7 +289,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_restorefh(struct nfsd4_compound_state *cstate)
 {
 	if (!cstate->save_fh.fh_dentry)
@@ -306,7 +299,7 @@ nfsd4_restorefh(struct nfsd4_compound_state *cstate)
 	return nfs_ok;
 }
 
-static inline __be32
+static __be32
 nfsd4_savefh(struct nfsd4_compound_state *cstate)
 {
 	if (!cstate->current_fh.fh_dentry)
@@ -319,7 +312,7 @@ nfsd4_savefh(struct nfsd4_compound_state *cstate)
 /*
  * misc nfsv4 ops
  */
-static inline __be32
+static __be32
 nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_access *access)
 {
@@ -331,7 +324,7 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			   &access->ac_supported);
 }
 
-static inline __be32
+static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_commit *commit)
 {
@@ -434,7 +427,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      struct nfsd4_getattr *getattr)
 {
@@ -454,7 +447,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return nfs_ok;
 }
 
-static inline __be32
+static __be32
 nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	   struct nfsd4_link *link)
 {
@@ -488,7 +481,7 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 			   "..", 2, &cstate->current_fh);
 }
 
-static inline __be32
+static __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_lookup *lookup)
 {
@@ -497,7 +490,7 @@ nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			   &cstate->current_fh);
 }
 
-static inline __be32
+static __be32
 nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	   struct nfsd4_read *read)
 {
@@ -527,7 +520,7 @@ out:
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      struct nfsd4_readdir *readdir)
 {
@@ -551,7 +544,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return nfs_ok;
 }
 
-static inline __be32
+static __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	       struct nfsd4_readlink *readlink)
 {
@@ -560,7 +553,7 @@ nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return nfs_ok;
 }
 
-static inline __be32
+static __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_remove *remove)
 {
@@ -579,7 +572,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_rename *rename)
 {
@@ -612,7 +605,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      struct nfsd4_setattr *setattr)
 {
@@ -639,7 +632,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
-static inline __be32
+static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    struct nfsd4_write *write)
 {
-- 
cgit v1.2.3


From c954e2a5d1c9662991a41282297ddebcadee0578 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:31 -0800
Subject: [PATCH] knfsd: nfsd4: make verify and nverify wrappers

Make wrappers for verify and nverify, for consistency with other ops.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 9fcf46a7d95..8fc2cab9d76 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -681,7 +681,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
  */
 static __be32
-nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+_nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     struct nfsd4_verify *verify)
 {
 	__be32 *buf, *p;
@@ -733,6 +733,26 @@ out_kfree:
 	return status;
 }
 
+static __be32
+nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	      struct nfsd4_verify *verify)
+{
+	__be32 status;
+
+	status = _nfsd4_verify(rqstp, cstate, verify);
+	return status == nfserr_not_same ? nfs_ok : status;
+}
+
+static __be32
+nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     struct nfsd4_verify *verify)
+{
+	__be32 status;
+
+	status = _nfsd4_verify(rqstp, cstate, verify);
+	return status == nfserr_same ? nfs_ok : status;
+}
+
 /*
  * NULL call.
  */
@@ -911,10 +931,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfsd4_lookupp(rqstp, cstate);
 			break;
 		case OP_NVERIFY:
-			op->status = nfsd4_verify(rqstp, cstate,
+			op->status = nfsd4_nverify(rqstp, cstate,
 						  &op->u.nverify);
-			if (op->status == nfserr_not_same)
-				op->status = nfs_ok;
 			break;
 		case OP_OPEN:
 			op->status = nfsd4_open(rqstp, cstate,
@@ -975,8 +993,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		case OP_VERIFY:
 			op->status = nfsd4_verify(rqstp, cstate,
 						  &op->u.verify);
-			if (op->status == nfserr_same)
-				op->status = nfs_ok;
 			break;
 		case OP_WRITE:
 			op->status = nfsd4_write(rqstp, cstate, &op->u.write);
-- 
cgit v1.2.3


From b591480bbe1a7f0e90533bce8ea86efecc84648e Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:38 -0800
Subject: [PATCH] knfsd: nfsd4: reorganize compound ops

Define an op descriptor struct, use it to simplify nfsd4_proc_compound().

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c  | 254 ++++++++++++++++++++++++++--------------------------
 fs/nfsd/nfs4state.c |  14 ++-
 2 files changed, 139 insertions(+), 129 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8fc2cab9d76..e1c463bad12 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -258,7 +258,8 @@ out:
  * filehandle-manipulating ops.
  */
 static __be32
-nfsd4_getfh(struct nfsd4_compound_state *cstate, struct svc_fh **getfh)
+nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    struct svc_fh **getfh)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
@@ -279,7 +280,8 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 }
 
 static __be32
-nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
+nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		void *arg)
 {
 	__be32 status;
 
@@ -290,7 +292,8 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
 }
 
 static __be32
-nfsd4_restorefh(struct nfsd4_compound_state *cstate)
+nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		void *arg)
 {
 	if (!cstate->save_fh.fh_dentry)
 		return nfserr_restorefh;
@@ -300,7 +303,8 @@ nfsd4_restorefh(struct nfsd4_compound_state *cstate)
 }
 
 static __be32
-nfsd4_savefh(struct nfsd4_compound_state *cstate)
+nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	     void *arg)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
@@ -463,7 +467,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 }
 
 static __be32
-nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate)
+nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	      void *arg)
 {
 	struct svc_fh tmp_fh;
 	__be32 ret;
@@ -791,6 +796,16 @@ static struct nfsd4_compound_state *cstate_alloc(void)
 	return cstate;
 }
 
+typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+			      void *);
+
+struct nfsd4_operation {
+	nfsd4op_func op_func;
+	u32 op_flags;
+};
+
+static struct nfsd4_operation nfsd4_ops[];
+
 /*
  * COMPOUND call.
  */
@@ -800,6 +815,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		    struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_op	*op;
+	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = NULL;
 	int		slack_bytes;
 	__be32		status;
@@ -854,6 +870,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			goto encode_op;
 		}
 
+		opdesc = &nfsd4_ops[op->opnum];
+
 		/* All operations except RENEW, SETCLIENTID, RESTOREFH
 		* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
 		* require a valid current filehandle
@@ -883,127 +901,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			op->status = nfserr_moved;
 			goto encode_op;
 		}
-		switch (op->opnum) {
-		case OP_ACCESS:
-			op->status = nfsd4_access(rqstp, cstate,
-						  &op->u.access);
-			break;
-		case OP_CLOSE:
-			op->status = nfsd4_close(rqstp, cstate,
-						 &op->u.close);
-			break;
-		case OP_COMMIT:
-			op->status = nfsd4_commit(rqstp, cstate,
-						  &op->u.commit);
-			break;
-		case OP_CREATE:
-			op->status = nfsd4_create(rqstp, cstate,
-						  &op->u.create);
-			break;
-		case OP_DELEGRETURN:
-			op->status = nfsd4_delegreturn(rqstp, cstate,
-						       &op->u.delegreturn);
-			break;
-		case OP_GETATTR:
-			op->status = nfsd4_getattr(rqstp, cstate,
-						   &op->u.getattr);
-			break;
-		case OP_GETFH:
-			op->status = nfsd4_getfh(cstate, &op->u.getfh);
-			break;
-		case OP_LINK:
-			op->status = nfsd4_link(rqstp, cstate, &op->u.link);
-			break;
-		case OP_LOCK:
-			op->status = nfsd4_lock(rqstp, cstate, &op->u.lock);
-			break;
-		case OP_LOCKT:
-			op->status = nfsd4_lockt(rqstp, cstate, &op->u.lockt);
-			break;
-		case OP_LOCKU:
-			op->status = nfsd4_locku(rqstp, cstate, &op->u.locku);
-			break;
-		case OP_LOOKUP:
-			op->status = nfsd4_lookup(rqstp, cstate,
-						  &op->u.lookup);
-			break;
-		case OP_LOOKUPP:
-			op->status = nfsd4_lookupp(rqstp, cstate);
-			break;
-		case OP_NVERIFY:
-			op->status = nfsd4_nverify(rqstp, cstate,
-						  &op->u.nverify);
-			break;
-		case OP_OPEN:
-			op->status = nfsd4_open(rqstp, cstate,
-						&op->u.open);
-			break;
-		case OP_OPEN_CONFIRM:
-			op->status = nfsd4_open_confirm(rqstp, cstate,
-							&op->u.open_confirm);
-			break;
-		case OP_OPEN_DOWNGRADE:
-			op->status = nfsd4_open_downgrade(rqstp, cstate,
-							&op->u.open_downgrade);
-			break;
-		case OP_PUTFH:
-			op->status = nfsd4_putfh(rqstp, cstate, &op->u.putfh);
-			break;
-		case OP_PUTROOTFH:
-			op->status = nfsd4_putrootfh(rqstp, cstate);
-			break;
-		case OP_READ:
-			op->status = nfsd4_read(rqstp, cstate, &op->u.read);
-			break;
-		case OP_READDIR:
-			op->status = nfsd4_readdir(rqstp, cstate,
-						   &op->u.readdir);
-			break;
-		case OP_READLINK:
-			op->status = nfsd4_readlink(rqstp, cstate,
-						    &op->u.readlink);
-			break;
-		case OP_REMOVE:
-			op->status = nfsd4_remove(rqstp, cstate,
-						  &op->u.remove);
-			break;
-		case OP_RENAME:
-			op->status = nfsd4_rename(rqstp, cstate,
-						  &op->u.rename);
-			break;
-		case OP_RENEW:
-			op->status = nfsd4_renew(&op->u.renew);
-			break;
-		case OP_RESTOREFH:
-			op->status = nfsd4_restorefh(cstate);
-			break;
-		case OP_SAVEFH:
-			op->status = nfsd4_savefh(cstate);
-			break;
-		case OP_SETATTR:
-			op->status = nfsd4_setattr(rqstp, cstate,
-						   &op->u.setattr);
-			break;
-		case OP_SETCLIENTID:
-			op->status = nfsd4_setclientid(rqstp, &op->u.setclientid);
-			break;
-		case OP_SETCLIENTID_CONFIRM:
-			op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm);
-			break;
-		case OP_VERIFY:
-			op->status = nfsd4_verify(rqstp, cstate,
-						  &op->u.verify);
-			break;
-		case OP_WRITE:
-			op->status = nfsd4_write(rqstp, cstate, &op->u.write);
-			break;
-		case OP_RELEASE_LOCKOWNER:
-			op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner);
-			break;
-		default:
+
+		if (opdesc->op_func)
+			op->status = opdesc->op_func(rqstp, cstate, &op->u);
+		else
 			BUG_ON(op->status == nfs_ok);
-			break;
-		}
 
 encode_op:
 		if (op->status == nfserr_replay_me) {
@@ -1031,6 +933,108 @@ out:
 	return status;
 }
 
+static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
+	[OP_ACCESS] = {
+		.op_func = (nfsd4op_func)nfsd4_access,
+	},
+	[OP_CLOSE] = {
+		.op_func = (nfsd4op_func)nfsd4_close,
+	},
+	[OP_COMMIT] = {
+		.op_func = (nfsd4op_func)nfsd4_commit,
+	},
+	[OP_CREATE] = {
+		.op_func = (nfsd4op_func)nfsd4_create,
+	},
+	[OP_DELEGRETURN] = {
+		.op_func = (nfsd4op_func)nfsd4_delegreturn,
+	},
+	[OP_GETATTR] = {
+		.op_func = (nfsd4op_func)nfsd4_getattr,
+	},
+	[OP_GETFH] = {
+		.op_func = (nfsd4op_func)nfsd4_getfh,
+	},
+	[OP_LINK] = {
+		.op_func = (nfsd4op_func)nfsd4_link,
+	},
+	[OP_LOCK] = {
+		.op_func = (nfsd4op_func)nfsd4_lock,
+	},
+	[OP_LOCKT] = {
+		.op_func = (nfsd4op_func)nfsd4_lockt,
+	},
+	[OP_LOCKU] = {
+		.op_func = (nfsd4op_func)nfsd4_locku,
+	},
+	[OP_LOOKUP] = {
+		.op_func = (nfsd4op_func)nfsd4_lookup,
+	},
+	[OP_LOOKUPP] = {
+		.op_func = (nfsd4op_func)nfsd4_lookupp,
+	},
+	[OP_NVERIFY] = {
+		.op_func = (nfsd4op_func)nfsd4_nverify,
+	},
+	[OP_OPEN] = {
+		.op_func = (nfsd4op_func)nfsd4_open,
+	},
+	[OP_OPEN_CONFIRM] = {
+		.op_func = (nfsd4op_func)nfsd4_open_confirm,
+	},
+	[OP_OPEN_DOWNGRADE] = {
+		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+	},
+	[OP_PUTFH] = {
+		.op_func = (nfsd4op_func)nfsd4_putfh,
+	},
+	[OP_PUTROOTFH] = {
+		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+	},
+	[OP_READ] = {
+		.op_func = (nfsd4op_func)nfsd4_read,
+	},
+	[OP_READDIR] = {
+		.op_func = (nfsd4op_func)nfsd4_readdir,
+	},
+	[OP_READLINK] = {
+		.op_func = (nfsd4op_func)nfsd4_readlink,
+	},
+	[OP_REMOVE] = {
+		.op_func = (nfsd4op_func)nfsd4_remove,
+	},
+	[OP_RENAME] = {
+		.op_func = (nfsd4op_func)nfsd4_rename,
+	},
+	[OP_RENEW] = {
+		.op_func = (nfsd4op_func)nfsd4_renew,
+	},
+	[OP_RESTOREFH] = {
+		.op_func = (nfsd4op_func)nfsd4_restorefh,
+	},
+	[OP_SAVEFH] = {
+		.op_func = (nfsd4op_func)nfsd4_savefh,
+	},
+	[OP_SETATTR] = {
+		.op_func = (nfsd4op_func)nfsd4_setattr,
+	},
+	[OP_SETCLIENTID] = {
+		.op_func = (nfsd4op_func)nfsd4_setclientid,
+	},
+	[OP_SETCLIENTID_CONFIRM] = {
+		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+	},
+	[OP_VERIFY] = {
+		.op_func = (nfsd4op_func)nfsd4_verify,
+	},
+	[OP_WRITE] = {
+		.op_func = (nfsd4op_func)nfsd4_write,
+	},
+	[OP_RELEASE_LOCKOWNER] = {
+		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
+	},
+};
+
 #define nfs4svc_decode_voidargs		NULL
 #define nfs4svc_release_void		NULL
 #define nfsd4_voidres			nfsd4_voidargs
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index db701c47a34..9de89df961f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -711,7 +711,8 @@ out_err:
  *
  */
 __be32
-nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
+nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		  struct nfsd4_setclientid *setclid)
 {
 	__be32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct xdr_netobj 	clname = { 
@@ -876,7 +877,9 @@ out:
  * NOTE: callback information will be processed here in a future patch
  */
 __be32
-nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
+nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+			 struct nfsd4_compound_state *cstate,
+			 struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	__be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct nfs4_client *conf, *unconf;
@@ -1833,7 +1836,8 @@ static void laundromat_main(struct work_struct *);
 static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
 
 __be32
-nfsd4_renew(clientid_t *clid)
+nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+	    clientid_t *clid)
 {
 	struct nfs4_client *clp;
 	__be32 status;
@@ -2979,7 +2983,9 @@ out:
 }
 
 __be32
-nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+			struct nfsd4_compound_state *cstate,
+			struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
 	struct nfs4_stateowner *sop;
-- 
cgit v1.2.3


From eeac294ebd9254a937d90b00c48863e3af229047 Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:39 -0800
Subject: [PATCH] knfsd: nfsd4: simplify migration op check

I'm not too fond of these big if conditions.  Replace them by checks of a flag
in the operation descriptor.  To my eye this makes the code a bit more
self-documenting, and makes the complicated part of the code (proc_compound) a
little more compact.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e1c463bad12..3a652e74137 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -802,6 +802,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 struct nfsd4_operation {
 	nfsd4op_func op_func;
 	u32 op_flags;
+/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
+#define ALLOWED_ON_ABSENT_FS 1
 };
 
 static struct nfsd4_operation nfsd4_ops[];
@@ -887,17 +889,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 				op->status = nfserr_nofilehandle;
 				goto encode_op;
 			}
-		}
-		/* Check must be done at start of each operation, except
-		 * for GETATTR and ops not listed as returning NFS4ERR_MOVED
-		 */
-		else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
-			 !((op->opnum == OP_GETATTR) ||
-			   (op->opnum == OP_PUTROOTFH) ||
-			   (op->opnum == OP_PUTPUBFH) ||
-			   (op->opnum == OP_RENEW) ||
-			   (op->opnum == OP_SETCLIENTID) ||
-			   (op->opnum == OP_RELEASE_LOCKOWNER))) {
+		} else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
+			  !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
 			op->status = nfserr_moved;
 			goto encode_op;
 		}
@@ -951,6 +944,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_GETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_getattr,
+		.op_flags = ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_GETFH] = {
 		.op_func = (nfsd4op_func)nfsd4_getfh,
@@ -988,8 +982,13 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
 	},
+	[OP_PUTPUBFH] = {
+		/* unsupported; just for future reference: */
+		.op_flags = ALLOWED_ON_ABSENT_FS,
+	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_flags = ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
@@ -1008,6 +1007,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_RENEW] = {
 		.op_func = (nfsd4op_func)nfsd4_renew,
+		.op_flags = ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_RESTOREFH] = {
 		.op_func = (nfsd4op_func)nfsd4_restorefh,
@@ -1020,6 +1020,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_SETCLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid,
+		.op_flags = ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
@@ -1032,6 +1033,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_RELEASE_LOCKOWNER] = {
 		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
+		.op_flags = ALLOWED_ON_ABSENT_FS,
 	},
 };
 
-- 
cgit v1.2.3


From 27d630ece08ae15e35d54201665c247a9fae584c Mon Sep 17 00:00:00 2001
From: "J.Bruce Fields" <bfields@fieldses.org>
Date: Wed, 13 Dec 2006 00:35:43 -0800
Subject: [PATCH] knfsd: nfsd4: simplify filehandle check

Kill another big "if" clause.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3a652e74137..8522729830d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -802,8 +802,10 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 struct nfsd4_operation {
 	nfsd4op_func op_func;
 	u32 op_flags;
+/* Most ops require a valid current filehandle; a few don't: */
+#define ALLOWED_WITHOUT_FH 1
 /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
-#define ALLOWED_ON_ABSENT_FS 1
+#define ALLOWED_ON_ABSENT_FS 2
 };
 
 static struct nfsd4_operation nfsd4_ops[];
@@ -874,18 +876,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 
 		opdesc = &nfsd4_ops[op->opnum];
 
-		/* All operations except RENEW, SETCLIENTID, RESTOREFH
-		* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
-		* require a valid current filehandle
-		*/
 		if (!cstate->current_fh.fh_dentry) {
-			if (!((op->opnum == OP_PUTFH) ||
-			      (op->opnum == OP_PUTROOTFH) ||
-			      (op->opnum == OP_SETCLIENTID) ||
-			      (op->opnum == OP_SETCLIENTID_CONFIRM) ||
-			      (op->opnum == OP_RENEW) ||
-			      (op->opnum == OP_RESTOREFH) ||
-			      (op->opnum == OP_RELEASE_LOCKOWNER))) {
+			if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
 				op->status = nfserr_nofilehandle;
 				goto encode_op;
 			}
@@ -981,14 +973,15 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_PUTPUBFH] = {
 		/* unsupported; just for future reference: */
-		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
-		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
@@ -1007,10 +1000,11 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_RENEW] = {
 		.op_func = (nfsd4op_func)nfsd4_renew,
-		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_RESTOREFH] = {
 		.op_func = (nfsd4op_func)nfsd4_restorefh,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_SAVEFH] = {
 		.op_func = (nfsd4op_func)nfsd4_savefh,
@@ -1020,10 +1014,11 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_SETCLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid,
-		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 	[OP_VERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_verify,
@@ -1033,7 +1028,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
 	},
 	[OP_RELEASE_LOCKOWNER] = {
 		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
-		.op_flags = ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
 	},
 };
 
-- 
cgit v1.2.3


From f988443a84528bd30c2f474efa5e2c511959f19b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 13 Dec 2006 00:35:45 -0800
Subject: [PATCH] knfsd: Fix up some bit-rot in exp_export

The nfsservctl system call isn't used but recent nfs-utils releases for
exporting filesystems, and consequently the code that is uses - exp_export -
has suffered some bitrot.

Particular:
  - some newly added fields in 'struct svc_export' are being initialised
    properly.
  - the return value is now always -ENOMEM ...

This patch fixes both these problems.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 1137d09c597..248dd92e6a5 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -955,6 +955,8 @@ exp_export(struct nfsctl_export *nxp)
 
 	exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL);
 
+	memset(&new, 0, sizeof(new));
+
 	/* must make sure there won't be an ex_fsid clash */
 	if ((nxp->ex_flags & NFSEXP_FSID) &&
 	    (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
@@ -985,6 +987,9 @@ exp_export(struct nfsctl_export *nxp)
 
 	new.h.expiry_time = NEVER;
 	new.h.flags = 0;
+	new.ex_path = kstrdup(nxp->ex_path, GFP_KERNEL);
+	if (!new.ex_path)
+		goto finish;
 	new.ex_client = clp;
 	new.ex_mnt = nd.mnt;
 	new.ex_dentry = nd.dentry;
@@ -1005,10 +1010,11 @@ exp_export(struct nfsctl_export *nxp)
 		/* failed to create at least one index */
 		exp_do_unexport(exp);
 		cache_flush();
-		err = -ENOMEM;
-	}
-
+	} else
+		err = 0;
 finish:
+	if (new.ex_path)
+		kfree(new.ex_path);
 	if (exp)
 		exp_put(exp);
 	if (fsid_key && !IS_ERR(fsid_key))
-- 
cgit v1.2.3


From 5cbded585d129d0226cb48ac4202b253c781be26 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Wed, 13 Dec 2006 00:35:56 -0800
Subject: [PATCH] getting rid of all casts of k[cmz]alloc() calls

Run this:

	#!/bin/sh
	for f in $(grep -Erl "\([^\)]*\) *k[cmz]alloc" *) ; do
	  echo "De-casting $f..."
	  perl -pi -e "s/ ?= ?\([^\)]*\) *(k[cmz]alloc) *\(/ = \1\(/" $f
	done

And then go through and reinstate those cases where code is casting pointers
to non-pointers.

And then drop a few hunks which conflicted with outstanding work.

Cc: Russell King <rmk@arm.linux.org.uk>, Ian Molton <spyro@f2s.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Greg KH <greg@kroah.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Paul Fulghum <paulkf@microgate.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Karsten Keil <kkeil@suse.de>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Steven French <sfrench@us.ibm.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: Jaroslav Kysela <perex@suse.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/inode.c      |  2 +-
 fs/befs/btree.c         |  2 +-
 fs/befs/debug.c         |  6 +++---
 fs/binfmt_misc.c        |  2 +-
 fs/cifs/cifssmb.c       |  2 +-
 fs/jffs/inode-v23.c     |  4 ++--
 fs/jffs/intrep.c        | 14 +++++++-------
 fs/jfs/jfs_dtree.c      |  4 ++--
 fs/jfs/jfs_imap.c       |  2 +-
 fs/lockd/svcshare.c     |  2 +-
 fs/nfs/nfs4proc.c       |  2 +-
 fs/reiserfs/xattr_acl.c |  2 +-
 12 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 9c48250fd72..e8f6c5ad3e9 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -313,7 +313,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
 
-	sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL);
+	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if ( !sbi )
 		goto fail_unlock;
 	DPRINTK("starting up, sbi = %p",sbi);
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 81b042ee24e..af5bb93276f 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -260,7 +260,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds,
 		goto error;
 	}
 
-	this_node = (befs_btree_node *) kmalloc(sizeof (befs_btree_node),
+	this_node = kmalloc(sizeof (befs_btree_node),
 						GFP_NOFS);
 	if (!this_node) {
 		befs_error(sb, "befs_btree_find() failed to allocate %u "
diff --git a/fs/befs/debug.c b/fs/befs/debug.c
index e831a8f3084..b8e304a0661 100644
--- a/fs/befs/debug.c
+++ b/fs/befs/debug.c
@@ -28,7 +28,7 @@ void
 befs_error(const struct super_block *sb, const char *fmt, ...)
 {
 	va_list args;
-	char *err_buf = (char *) kmalloc(ERRBUFSIZE, GFP_KERNEL);
+	char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL);
 	if (err_buf == NULL) {
 		printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE);
 		return;
@@ -46,7 +46,7 @@ void
 befs_warning(const struct super_block *sb, const char *fmt, ...)
 {
 	va_list args;
-	char *err_buf = (char *) kmalloc(ERRBUFSIZE, GFP_KERNEL);
+	char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL);
 	if (err_buf == NULL) {
 		printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE);
 		return;
@@ -70,7 +70,7 @@ befs_debug(const struct super_block *sb, const char *fmt, ...)
 	char *err_buf = NULL;
 
 	if (BEFS_SB(sb)->mount_opts.debug) {
-		err_buf = (char *) kmalloc(ERRBUFSIZE, GFP_KERNEL);
+		err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL);
 		if (err_buf == NULL) {
 			printk(KERN_ERR "could not allocate %d bytes\n",
 				ERRBUFSIZE);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 00687ea6273..c2e08252af3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -311,7 +311,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	err = -ENOMEM;
 	memsize = sizeof(Node) + count + 8;
-	e = (Node *) kmalloc(memsize, GFP_USER);
+	e = kmalloc(memsize, GFP_USER);
 	if (!e)
 		goto out;
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 098790eb2aa..472e33e0f3c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4876,7 +4876,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
 	} else {
 		/* Add file to outstanding requests */
 		/* BB change to kmem cache alloc */	
-		dnotify_req = (struct dir_notify_req *) kmalloc(
+		dnotify_req = kmalloc(
 						sizeof(struct dir_notify_req),
 						 GFP_KERNEL);
 		if(dnotify_req) {
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 7b40c69f44e..43baa1afa02 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -818,7 +818,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	D1({
 	        int len = dentry->d_name.len;
-		char *_name = (char *) kmalloc(len + 1, GFP_KERNEL);
+		char *_name = kmalloc(len + 1, GFP_KERNEL);
 		memcpy(_name, dentry->d_name.name, len);
 		_name[len] = '\0';
 		printk("***jffs_mkdir(): dir = 0x%p, name = \"%s\", "
@@ -964,7 +964,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type)
 	D1({
 		int len = dentry->d_name.len;
 		const char *name = dentry->d_name.name;
-		char *_name = (char *) kmalloc(len + 1, GFP_KERNEL);
+		char *_name = kmalloc(len + 1, GFP_KERNEL);
 		memcpy(_name, name, len);
 		_name[len] = '\0';
 		printk("***jffs_remove(): file = \"%s\", ino = %ld\n", _name, dentry->d_inode->i_ino);
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index d0e783f199e..6dd18911b44 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -436,7 +436,7 @@ jffs_checksum_flash(struct mtd_info *mtd, loff_t start, int size, __u32 *result)
 	int i, length;
 
 	/* Allocate read buffer */
-	read_buf = (__u8 *) kmalloc (sizeof(__u8) * 4096, GFP_KERNEL);
+	read_buf = kmalloc(sizeof(__u8) * 4096, GFP_KERNEL);
 	if (!read_buf) {
 		printk(KERN_NOTICE "kmalloc failed in jffs_checksum_flash()\n");
 		return -ENOMEM;
@@ -744,11 +744,11 @@ static int check_partly_erased_sectors(struct jffs_fmcontrol *fmc){
 
 
 	/* Allocate read buffers */
-	read_buf1 = (__u8 *) kmalloc (sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
+	read_buf1 = kmalloc(sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
 	if (!read_buf1)
 		return -ENOMEM;
 
-	read_buf2 = (__u8 *) kmalloc (sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
+	read_buf2 = kmalloc(sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
 	if (!read_buf2) {
 		kfree(read_buf1);
 		return -ENOMEM;
@@ -876,7 +876,7 @@ jffs_scan_flash(struct jffs_control *c)
 	}
 
 	/* Allocate read buffer */
-	read_buf = (__u8 *) kmalloc (sizeof(__u8) * 4096, GFP_KERNEL);
+	read_buf = kmalloc(sizeof(__u8) * 4096, GFP_KERNEL);
 	if (!read_buf) {
 		flash_safe_release(fmc->mtd);
 		return -ENOMEM;
@@ -1463,7 +1463,7 @@ jffs_insert_node(struct jffs_control *c, struct jffs_file *f,
 			kfree(f->name);
 			DJM(no_name--);
 		}
-		if (!(f->name = (char *) kmalloc(raw_inode->nsize + 1,
+		if (!(f->name = kmalloc(raw_inode->nsize + 1,
 						 GFP_KERNEL))) {
 			return -ENOMEM;
 		}
@@ -1737,7 +1737,7 @@ jffs_find_child(struct jffs_file *dir, const char *name, int len)
 		printk("jffs_find_child(): Found \"%s\".\n", f->name);
 	}
 	else {
-		char *copy = (char *) kmalloc(len + 1, GFP_KERNEL);
+		char *copy = kmalloc(len + 1, GFP_KERNEL);
 		if (copy) {
 			memcpy(copy, name, len);
 			copy[len] = '\0';
@@ -2627,7 +2627,7 @@ jffs_print_tree(struct jffs_file *first_file, int indent)
 		return;
 	}
 
-	if (!(space = (char *) kmalloc(indent + 1, GFP_KERNEL))) {
+	if (!(space = kmalloc(indent + 1, GFP_KERNEL))) {
 		printk("jffs_print_tree(): Out of memory!\n");
 		return;
 	}
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 47bc0b5d132..6d62f322289 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3777,12 +3777,12 @@ static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
 	struct component_name lkey;
 	struct component_name rkey;
 
-	lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
+	lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
 					GFP_KERNEL);
 	if (lkey.name == NULL)
 		return -ENOMEM;
 
-	rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
+	rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
 					GFP_KERNEL);
 	if (rkey.name == NULL) {
 		kfree(lkey.name);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ee9b473b7b8..53f63b47a6d 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -120,7 +120,7 @@ int diMount(struct inode *ipimap)
 	 * allocate/initialize the in-memory inode map control structure
 	 */
 	/* allocate the in-memory inode map control structure. */
-	imap = (struct inomap *) kmalloc(sizeof(struct inomap), GFP_KERNEL);
+	imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
 	if (imap == NULL) {
 		jfs_err("diMount: kmalloc returned NULL!");
 		return -ENOMEM;
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 6220dc2a3f2..068886de4dd 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -39,7 +39,7 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 			return nlm_lck_denied;
 	}
 
-	share = (struct nlm_share *) kmalloc(sizeof(*share) + oh->len,
+	share = kmalloc(sizeof(*share) + oh->len,
 						GFP_KERNEL);
 	if (share == NULL)
 		return nlm_lck_denied_nolocks;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee458aeab24..b3fd29baadc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1877,7 +1877,7 @@ static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
 	struct nfs_server *server = NFS_SERVER(dir->d_inode);
 	struct unlink_desc *up;
 
-	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+	up = kmalloc(sizeof(*up), GFP_KERNEL);
 	if (!up)
 		return -ENOMEM;
 	
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 97ae1b92bc4..5296a29cc5e 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -135,7 +135,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 	int n;
 
 	*size = reiserfs_acl_size(acl->a_count);
-	ext_acl = (reiserfs_acl_header *) kmalloc(sizeof(reiserfs_acl_header) +
+	ext_acl = kmalloc(sizeof(reiserfs_acl_header) +
 						  acl->a_count *
 						  sizeof(reiserfs_acl_entry),
 						  GFP_NOFS);
-- 
cgit v1.2.3


From 4f36557fbe4ab59feb2783cdb5d049cb8c3f34f3 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Fri, 24 Nov 2006 13:45:37 -0500
Subject: DebugFS : inotify create/mkdir support

Add inotify create and mkdir events to DebugFS.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 137d76c3f90..020da4c6059 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/kobject.h>
 #include <linux/namei.h>
 #include <linux/debugfs.h>
+#include <linux/fsnotify.h>
 
 #define DEBUGFS_MAGIC	0x64626720
 
@@ -87,15 +88,22 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
 	res = debugfs_mknod(dir, dentry, mode, 0);
-	if (!res)
+	if (!res) {
 		inc_nlink(dir);
+		fsnotify_mkdir(dir, dentry);
+	}
 	return res;
 }
 
 static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode)
 {
+	int res;
+
 	mode = (mode & S_IALLUGO) | S_IFREG;
-	return debugfs_mknod(dir, dentry, mode, 0);
+	res = debugfs_mknod(dir, dentry, mode, 0);
+	if (!res)
+		fsnotify_create(dir, dentry);
+	return res;
 }
 
 static inline int debugfs_positive(struct dentry *dentry)
-- 
cgit v1.2.3


From bafb232ec42aa6862798236009e8e5233d05ab36 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Fri, 24 Nov 2006 13:46:30 -0500
Subject: DebugFS : coding style fixes

Minor coding style fixes along the way : 80 cols and a white space.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 020da4c6059..05d1a9c4fb7 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -55,7 +55,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 			inode->i_op = &simple_dir_inode_operations;
 			inode->i_fop = &simple_dir_operations;
 
-			/* directory inodes start off with i_nlink == 2 (for "." entry) */
+			/* directory inodes start off with i_nlink == 2
+			 * (for "." entry) */
 			inc_nlink(inode);
 			break;
 		}
@@ -143,7 +144,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 	 * block. A pointer to that is in the struct vfsmount that we
 	 * have around.
 	 */
-	if (!parent ) {
+	if (!parent) {
 		if (debugfs_mount && debugfs_mount->mnt_sb) {
 			parent = debugfs_mount->mnt_sb->s_root;
 		}
-- 
cgit v1.2.3


From 63223a0654c2a473ae64835819b87826cb7415ee Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Fri, 24 Nov 2006 13:48:19 -0500
Subject: DebugFS : file/directory creation error handling

Fix error handling of file and directory creation in DebugFS.

The error path should release the file system because no _remove will be called
for this erroneous creation.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 05d1a9c4fb7..d6c5fb53c74 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -206,13 +206,15 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
 
 	pr_debug("debugfs: creating file '%s'\n",name);
 
-	error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count);
+	error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
+			      &debugfs_mount_count);
 	if (error)
 		goto exit;
 
 	error = debugfs_create_by_name(name, mode, parent, &dentry);
 	if (error) {
 		dentry = NULL;
+		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 		goto exit;
 	}
 
-- 
cgit v1.2.3


From 65c333367b1aea57d58168ad3dc1df27b0227401 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Fri, 24 Nov 2006 13:50:09 -0500
Subject: DebugFS : more file/directory creation error handling

Correct dentry count to handle creation errors.

This patch puts a dput at the file creation instead of the file removal :
lookup_one_len already returns a dentry with reference count of 1. Then,
the dget() in simple_mknod increments it when the dentry is associated
with a file. In a scenario where simple_create or simple_mkdir returns
an error, this would lead to an unwanted increment of the reference
counter, therefore making file removal impossible.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d6c5fb53c74..554f4a9dfaf 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -162,6 +162,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 			error = debugfs_mkdir(parent->d_inode, *dentry, mode);
 		else 
 			error = debugfs_create(parent->d_inode, *dentry, mode);
+		dput(*dentry);
 	} else
 		error = PTR_ERR(*dentry);
 	mutex_unlock(&parent->d_inode->i_mutex);
@@ -273,6 +274,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
 void debugfs_remove(struct dentry *dentry)
 {
 	struct dentry *parent;
+	int ret = 0;
 	
 	if (!dentry)
 		return;
@@ -284,11 +286,15 @@ void debugfs_remove(struct dentry *dentry)
 	mutex_lock(&parent->d_inode->i_mutex);
 	if (debugfs_positive(dentry)) {
 		if (dentry->d_inode) {
-			if (S_ISDIR(dentry->d_inode->i_mode))
-				simple_rmdir(parent->d_inode, dentry);
-			else
+			if (S_ISDIR(dentry->d_inode->i_mode)) {
+				ret = simple_rmdir(parent->d_inode, dentry);
+				if (ret)
+					printk(KERN_ERR
+						"DebugFS rmdir on %s failed : "
+						"directory not empty.\n",
+						dentry->d_name.name);
+			} else
 				simple_unlink(parent->d_inode, dentry);
-		dput(dentry);
 		}
 	}
 	mutex_unlock(&parent->d_inode->i_mutex);
-- 
cgit v1.2.3


From 29a7f3ada7fea5510504c5359c3f70d109aeb055 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Fri, 24 Nov 2006 13:51:14 -0500
Subject: DebugFS : file/directory removal fix

Fix file and directory removal in debugfs. Add inotify support for file removal.

The following scenario :
create dir a
create dir a/b

cd a/b (some process goes in cwd a/b)

rmdir a/b
rmdir a

fails due to the fact that "a" appears to be non empty. It is because
the "b" dentry is not deleted from "a" and still in use. The same
problem happens if "b" is a file. d_delete is nice enough to know when
it needs to unhash and free the dentry if nothing else is using it or,
if someone is using it, to remove it from the hash queues and wait for
it to be deleted when it has no users.

The nice side-effect of this fix is that it calls the file removal
notification.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 554f4a9dfaf..c692487346e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -286,6 +286,7 @@ void debugfs_remove(struct dentry *dentry)
 	mutex_lock(&parent->d_inode->i_mutex);
 	if (debugfs_positive(dentry)) {
 		if (dentry->d_inode) {
+			dget(dentry);
 			if (S_ISDIR(dentry->d_inode->i_mode)) {
 				ret = simple_rmdir(parent->d_inode, dentry);
 				if (ret)
@@ -295,6 +296,9 @@ void debugfs_remove(struct dentry *dentry)
 						dentry->d_name.name);
 			} else
 				simple_unlink(parent->d_inode, dentry);
+			if (!ret)
+				d_delete(dentry);
+			dput(dentry);
 		}
 	}
 	mutex_unlock(&parent->d_inode->i_mutex);
-- 
cgit v1.2.3


From c80e7c83d56866a735236b45441f024b589f9e88 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Fri, 8 Dec 2006 14:31:12 -0500
Subject: [DLM] fix compile warning

This patch fixes a compile warning in lowcomms-tcp.c indicating that
kmem_cache_t is deprecated.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lowcomms-tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 8f2791fc844..9be3a440c42 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -143,7 +143,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
 /* An array of pointers to connections, indexed by NODEID */
 static struct connection **connections;
 static DECLARE_MUTEX(connections_lock);
-static kmem_cache_t *con_cache;
+static struct kmem_cache *con_cache;
 static int conn_array_size;
 
 /* List of sockets that have reads pending */
-- 
cgit v1.2.3


From 1003f06953472ecc34f12d9867670f475a8c1af6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 12 Dec 2006 10:16:25 +0000
Subject: [GFS2] Fix Kconfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Here is a patch to fix up the Kconfig so that we don't land up with
problems when people disable the NET subsystem.  Thanks for all the hints and
suggestions that people have sent me regarding this.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Aleksandr Koltsoff <czr@iki.fi>
Cc: Toralf Förster <toralf.foerster@gmx.de>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Adrian Bunk <bunk@stusta.de>
Cc: Chris Zubrzycki <chris@middle--earth.org>
Cc: Patrick Caulfield <pcaulfie@redhat.com>
---
 fs/gfs2/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index c0791cbacad..6a2ffa2db14 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -34,7 +34,9 @@ config GFS2_FS_LOCKING_NOLOCK
 
 config GFS2_FS_LOCKING_DLM
 	tristate "GFS2 DLM locking module"
-	depends on GFS2_FS
+	depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n)
+	select IP_SCTP if DLM_SCTP
+	select CONFIGFS_FS
 	select DLM
 	help
 	Multiple node locking module for GFS2
-- 
cgit v1.2.3


From 5ccac88eeb5659c716af8e695e2943509c80d172 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 18 Dec 2006 13:31:18 +0000
Subject: [PATCH] fix leaks on pipe(2) failure exits

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 9a06e8e48e8..68090e84f58 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -935,8 +935,9 @@ struct file *create_write_pipe(void)
 
 void free_write_pipe(struct file *f)
 {
-	mntput(f->f_path.mnt);
+	free_pipe_info(f->f_dentry->d_inode);
 	dput(f->f_path.dentry);
+	mntput(f->f_path.mnt);
 	put_filp(f);
 }
 
@@ -994,6 +995,8 @@ int do_pipe(int *fd)
  err_fdr:
 	put_unused_fd(fdr);
  err_read_pipe:
+	dput(fr->f_dentry);
+	mntput(fr->f_vfsmnt);
 	put_filp(fr);
  err_write_pipe:
 	free_write_pipe(fw);
-- 
cgit v1.2.3


From 46d2277c796f9f4937bfa668c40b2e3f43e93dd0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Tue, 19 Dec 2006 15:21:59 -0800
Subject: Clean up and make try_to_free_buffers() not race with dirty pages

This is preparatory work in our continuing saga on some hard-to-trigger
file corruption with shared writable mmap() after the dirty page
tracking changes (commit d08b3851da41d0ee60851f2c75b118e1f7a5fc89 etc)
were merged.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index d1f1b54d310..263f88e4dff 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2834,7 +2834,7 @@ int try_to_free_buffers(struct page *page)
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
-	if (PageWriteback(page))
+	if (PageDirty(page) || PageWriteback(page))
 		return 0;
 
 	if (mapping == NULL) {		/* can this still happen? */
@@ -2845,22 +2845,6 @@ int try_to_free_buffers(struct page *page)
 	spin_lock(&mapping->private_lock);
 	ret = drop_buffers(page, &buffers_to_free);
 	spin_unlock(&mapping->private_lock);
-	if (ret) {
-		/*
-		 * If the filesystem writes its buffers by hand (eg ext3)
-		 * then we can have clean buffers against a dirty page.  We
-		 * clean the page here; otherwise later reattachment of buffers
-		 * could encounter a non-uptodate page, which is unresolvable.
-		 * This only applies in the rare case where try_to_free_buffers
-		 * succeeds but the page is not freed.
-		 *
-		 * Also, during truncate, discard_buffer will have marked all
-		 * the page's buffers clean.  We discover that here and clean
-		 * the page also.
-		 */
-		if (test_clear_page_dirty(page))
-			task_io_account_cancelled_write(PAGE_CACHE_SIZE);
-	}
 out:
 	if (buffers_to_free) {
 		struct buffer_head *bh = buffers_to_free;
-- 
cgit v1.2.3


From fba2591bf4e418b6c3f9f8794c9dd8fe40ae7bd9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Wed, 20 Dec 2006 13:46:42 -0800
Subject: VM: Remove "clear_page_dirty()" and "test_clear_page_dirty()"
 functions

They were horribly easy to mis-use because of their tempting naming, and
they also did way more than any users of them generally wanted them to
do.

A dirty page can become clean under two circumstances:

 (a) when we write it out.  We have "clear_page_dirty_for_io()" for
     this, and that function remains unchanged.

     In the "for IO" case it is not sufficient to just clear the dirty
     bit, you also have to mark the page as being under writeback etc.

 (b) when we actually remove a page due to it becoming inaccessible to
     users, notably because it was truncate()'d away or the file (or
     metadata) no longer exists, and we thus want to cancel any
     outstanding dirty state.

For the (b) case, we now introduce "cancel_dirty_page()", which only
touches the page state itself, and verifies that the page is not mapped
(since cancelling writes on a mapped page would be actively wrong as it
is still accessible to users).

Some filesystems need to be fixed up for this: CIFS, FUSE, JFS,
ReiserFS, XFS all use the old confusing functions, and will be fixed
separately in subsequent commits (with some of them just removing the
offending logic, and others using clear_page_dirty_for_io()).

This was confirmed by Martin Michlmayr to fix the apt database
corruption on ARM.

Cc: Martin Michlmayr <tbm@cyrius.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Andrei Popa <andrei.popa@i-neo.ro>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: Gordon Farquharson <gordonfarquharson@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ed2c22340ad..4f4cd132b57 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -176,7 +176,7 @@ static int hugetlbfs_commit_write(struct file *file,
 
 static void truncate_huge_page(struct page *page)
 {
-	clear_page_dirty(page);
+	cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
 	ClearPageUptodate(page);
 	remove_from_page_cache(page);
 	put_page(page);
-- 
cgit v1.2.3


From d0e671a932cb9c653b27393cec26aec012a8d97e Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Wed, 20 Dec 2006 15:55:35 -0600
Subject: [PATCH] Fix JFS after clear_page_dirty() removal

This patch removes some questionable code that attempted to make a
no-longer-used page easier to reclaim.

Calling metapage_writepage against such a page will not result in any
I/O being performed, so removing this code shouldn't be a big deal.

[ It's likely that we could have just replaced the "clear_page_dirty()"
  call with a call to "cancel_dirty_page()" instead, but in the
  meantime this is cleaner and simpler anyway, so unless there is some
  overriding reason (and Dave implies there isn't) I'll just use this
  patch as-is.			- Linus ]

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_metapage.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index b1a1c729601..ceaf03b9493 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -764,22 +764,9 @@ void release_metapage(struct metapage * mp)
 	} else if (mp->lsn)	/* discard_metapage doesn't remove it */
 		remove_from_logsync(mp);
 
-#if MPS_PER_PAGE == 1
-	/*
-	 * If we know this is the only thing in the page, we can throw
-	 * the page out of the page cache.  If pages are larger, we
-	 * don't want to do this.
-	 */
-
-	/* Retest mp->count since we may have released page lock */
-	if (test_bit(META_discard, &mp->flag) && !mp->count) {
-		clear_page_dirty(page);
-		ClearPageUptodate(page);
-	}
-#else
 	/* Try to keep metapages from using up too much memory */
 	drop_metapage(page, mp);
-#endif
+
 	unlock_page(page);
 	page_cache_release(page);
 }
-- 
cgit v1.2.3


From 9280f6822c2d7112b47107251fce307aefb31f35 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 21 Dec 2006 15:18:23 +0100
Subject: [PATCH] fuse: remove clear_page_dirty() call

The use by FUSE was just a remnant of an optimization from the time
when writable mappings were supported.

Now FUSE never actually allows the creation of dirty pages, so this
invocation of clear_page_dirty() is effectively a no-op.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1387749201b..f63efe1337e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -483,10 +483,8 @@ static int fuse_commit_write(struct file *file, struct page *page,
 			i_size_write(inode, pos);
 		spin_unlock(&fc->lock);
 
-		if (offset == 0 && to == PAGE_CACHE_SIZE) {
-			clear_page_dirty(page);
+		if (offset == 0 && to == PAGE_CACHE_SIZE)
 			SetPageUptodate(page);
-		}
 	}
 	fuse_invalidate_attr(inode);
 	return err;
-- 
cgit v1.2.3


From 921320210bd2ec4f17053d283355b73048ac0e56 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 21 Dec 2006 10:24:01 +1100
Subject: [PATCH] Fix XFS after clear_page_dirty() removal

XFS appears to call clear_page_dirty to get the mapping tree dirty tag
set correctly at the same time the page dirty flag is cleared.  I note
that this can be done by set_page_writeback() if we clear the dirty flag
on the page first when we are writing back the entire page.

Hence it seems to me that the XFS call to clear_page_dirty() could
easily be substituted by clear_page_dirty_for_io() followed by a call to
set_page_writeback() to get the mapping tree tags set correctly after
the page has been marked clean.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_aops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b56eb754e2d..7b54461695e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -341,9 +341,9 @@ xfs_start_page_writeback(
 {
 	ASSERT(PageLocked(page));
 	ASSERT(!PageWriteback(page));
-	set_page_writeback(page);
 	if (clear_dirty)
-		clear_page_dirty(page);
+		clear_page_dirty_for_io(page);
+	set_page_writeback(page);
 	unlock_page(page);
 	if (!buffers) {
 		end_page_writeback(page);
-- 
cgit v1.2.3


From 3b2b96abbf321891bdde5412d23bc4123c6cccec Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 22 Dec 2006 01:07:09 -0800
Subject: [PATCH] fs/sysv/: proper prototypes for 2 functions

Add proper prototypes for sysv_{init,destroy}_icache() in sysv.h

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/super.c | 3 ---
 fs/sysv/sysv.h  | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index dc9e7dc07fb..6f9707a1b95 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -528,9 +528,6 @@ static struct file_system_type v7_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-extern int sysv_init_icache(void) __init;
-extern void sysv_destroy_icache(void);
-
 static int __init init_sysv_fs(void)
 {
 	int error;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 9dcc8212093..dcb18b2171f 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -143,6 +143,9 @@ extern int sysv_sync_inode(struct inode *);
 extern int sysv_sync_file(struct file *, struct dentry *, int);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int sysv_init_icache(void);
+extern void sysv_destroy_icache(void);
+
 
 /* dir.c */
 extern struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct page **);
-- 
cgit v1.2.3


From ba3ff12fca318225cb978c6181b83d38dcbc5b09 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Fri, 22 Dec 2006 01:08:43 -0800
Subject: [PATCH] fsstack: Remove inode copy

Trevor found a file size problem in eCryptfs in recent kernels, and he
tracked it down to an fsstack change.

This was the eCryptfs copy_attr_all:

> -void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src)
> -{
> -       dest->i_mode = src->i_mode;
> -       dest->i_nlink = src->i_nlink;
> -       dest->i_uid = src->i_uid;
> -       dest->i_gid = src->i_gid;
> -       dest->i_rdev = src->i_rdev;
> -       dest->i_atime = src->i_atime;
> -       dest->i_mtime = src->i_mtime;
> -       dest->i_ctime = src->i_ctime;
> -       dest->i_blkbits = src->i_blkbits;
> -       dest->i_flags = src->i_flags;
> -}

This is the fsstack copy_attr_all:

> +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
> +                               int (*get_nlinks)(struct inode *))
> +{
> +       if (!get_nlinks)
> +               dest->i_nlink = src->i_nlink;
> +       else
> +               dest->i_nlink = (*get_nlinks)(dest);
> +
> +       dest->i_mode = src->i_mode;
> +       dest->i_uid = src->i_uid;
> +       dest->i_gid = src->i_gid;
> +       dest->i_rdev = src->i_rdev;
> +       dest->i_atime = src->i_atime;
> +       dest->i_mtime = src->i_mtime;
> +       dest->i_ctime = src->i_ctime;
> +       dest->i_blkbits = src->i_blkbits;
> +       dest->i_flags = src->i_flags;
> +
> +       fsstack_copy_inode_size(dest, src);
> +}

The addition of copy_inode_size breaks eCryptfs, since eCryptfs needs to
interpolate the file sizes (eCryptfs has extra space in the lower file for
the header).  The setting of the upper inode size occurs elsewhere in
eCryptfs, and the new copy_attr_all now undoes what eCryptfs was doing
right beforehand.

I see three ways of going forward from here.  (1) Something like this patch
needs to go in (assuming it jives with Unionfs), (2) we need to make a
change to the fsstack API for more fine-grained control over copying
attributes (e.g., by also including a callback function for calculating the
right file size, which will require some more work on both eCryptfs and
Unionfs), or (3) the fsstack patch on eCryptfs (commit
0cc72dc7f050188d8d7344b1dd688cbc68d3cd30 made on Fri Dec 8 02:36:31 2006
-0800) needs to be yanked in 2.6.20.

I think the simplest solution, from eCryptfs' perspective, is to just
remove the inode size copy.

Remove inode size copy in general fsstack attr copy code. Stacked
filesystems may need to interpolate the inode size, since the file
size in the lower file may be different than the file size in the
stacked layer.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Acked-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/stack.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/stack.c b/fs/stack.c
index 5ddbc34535f..8ffb880d2f4 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -34,7 +34,5 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 	dest->i_ctime = src->i_ctime;
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
-
-	fsstack_copy_inode_size(dest, src);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
-- 
cgit v1.2.3


From 163ca88b9c5858909ee3f8801ae0096b5f94e835 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@gmail.com>
Date: Fri, 22 Dec 2006 01:09:03 -0800
Subject: [PATCH] Make JFFS depend on CONFIG_BROKEN

Mark JFFS as broken and provide a warning to users that it is deprecated
and scheduled for removal in 2.6.21

Signed-off-by: Josh Boyer <jwboyer@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 276ff3baaaf..8cd2417a14d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1198,13 +1198,16 @@ config EFS_FS
 
 config JFFS_FS
 	tristate "Journalling Flash File System (JFFS) support"
-	depends on MTD && BLOCK
+	depends on MTD && BLOCK && BROKEN
 	help
 	  JFFS is the Journalling Flash File System developed by Axis
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
 	  file system for disk-less embedded devices. Further information is
 	  available at (<http://developer.axis.com/software/jffs/>).
 
+	  NOTE: This filesystem is deprecated and is scheduled for removal in
+	  2.6.21.  See Documentation/feature-removal-schedule.txt
+
 config JFFS_FS_VERBOSE
 	int "JFFS debugging verbosity (0 = quiet, 3 = noisy)"
 	depends on JFFS_FS
-- 
cgit v1.2.3


From 01b2d93ca4c495f056471189ac6c4e6ac4cbbccb Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Fri, 22 Dec 2006 01:10:43 -0800
Subject: [PATCH] fdtable: Provide free_fdtable() wrapper

Christoph Hellwig has expressed concerns that the recent fdtable changes
expose the details of the RCU methodology used to release no-longer-used
fdtable structures to the rest of the kernel.  The trivial patch below
addresses these concerns by introducing the appropriate free_fdtable()
calls, which simply wrap the release RCU usage.  Since free_fdtable() is a
one-liner, it makes sense to promote it to an inline helper.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 857fa49e984..c5575de0111 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -206,7 +206,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
 		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
-			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
+			free_fdtable(cur_fdt);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		free_fdarr(new_fdt);
-- 
cgit v1.2.3


From 6f5a9da1af5a8c286575c30c2706dc1fbef9164b Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Fri, 22 Dec 2006 01:11:50 -0800
Subject: [PATCH] jbd: wait for already submitted t_sync_datalist buffer to
 complete

In the current jbd code, if a buffer on BJ_SyncData list is dirty and not
locked, the buffer is refiled to BJ_Locked list, submitted to the IO and
waited for IO completion.

But the fsstress test showed the case that when a buffer was already
submitted to the IO just before the buffer_dirty(bh) check, the buffer was
not waited for IO completion.

Following patch solves this problem.  If it is assumed that a buffer is
submitted to the IO before the buffer_dirty(bh) check and still being
written to disk, this buffer is refiled to BJ_Locked list.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Jan Kara <jack@ucw.cz>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/commit.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 10be51290a2..be4648bc7a2 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -248,8 +248,12 @@ write_out_data:
 				bufs = 0;
 				goto write_out_data;
 			}
-		}
-		else {
+		} else if (!locked && buffer_locked(bh)) {
+			__journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			put_bh(bh);
+		} else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
 			__journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-- 
cgit v1.2.3


From ffaa82008f1aad52a6d3979f49d2a76c2928b60f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Sat, 23 Dec 2006 09:32:45 -0800
Subject: Fix reiserfs after "test_clear_page_dirty()" removal

Thanks to Len Brown for testing this fix, since while they have in the
past, none of my machines run reiserfs at the moment.

Cc: Vladimir V. Saveliev <vs@namesys.com>
Acked-by: Len Brown <lenb@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/stree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 47e7027ea39..afb21ea4530 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1459,7 +1459,7 @@ static void unmap_buffers(struct page *page, loff_t pos)
 				bh = next;
 			} while (bh != head);
 			if (PAGE_SIZE == bh->b_size) {
-				clear_page_dirty(page);
+				cancel_dirty_page(page, PAGE_CACHE_SIZE);
 			}
 		}
 	}
-- 
cgit v1.2.3


From cb876f451455b6187a7d69de2c112c45ec4b7f99 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Sat, 23 Dec 2006 16:19:07 -0800
Subject: Fix up CIFS for "test_clear_page_dirty()" removal

This also adds he required page "writeback" flag handling, that cifs
hasn't been doing and that the page dirty flag changes made obvious.

Acked-by: Steve French <smfltc@us.ibm.com>
Acked-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/file.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0f05cab5d24..8a49b2e77d3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1245,14 +1245,21 @@ retry:
 				wait_on_page_writeback(page);
 
 			if (PageWriteback(page) ||
-					!test_clear_page_dirty(page)) {
+					!clear_page_dirty_for_io(page)) {
 				unlock_page(page);
 				break;
 			}
 
+			/*
+			 * This actually clears the dirty bit in the radix tree.
+			 * See cifs_writepage() for more commentary.
+			 */
+			set_page_writeback(page);
+
 			if (page_offset(page) >= mapping->host->i_size) {
 				done = 1;
 				unlock_page(page);
+				end_page_writeback(page);
 				break;
 			}
 
@@ -1316,6 +1323,7 @@ retry:
 					SetPageError(page);
 				kunmap(page);
 				unlock_page(page);
+				end_page_writeback(page);
 				page_cache_release(page);
 			}
 			if ((wbc->nr_to_write -= n_iov) <= 0)
@@ -1352,11 +1360,23 @@ static int cifs_writepage(struct page* page, struct writeback_control *wbc)
         if (!PageUptodate(page)) {
 		cFYI(1, ("ppw - page not up to date"));
 	}
-	
+
+	/*
+	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
+	 *
+	 * A writepage() implementation always needs to do either this,
+	 * or re-dirty the page with "redirty_page_for_writepage()" in
+	 * the case of a failure.
+	 *
+	 * Just unlocking the page will cause the radix tree tag-bits
+	 * to fail to update with the state of the page correctly.
+	 */
+	set_page_writeback(page);		
 	rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
 	SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
 	unlock_page(page);
-	page_cache_release(page);	
+	end_page_writeback(page);
+	page_cache_release(page);
 	FreeXid(xid);
 	return rc;
 }
-- 
cgit v1.2.3


From 0333394bff439c3fb09264303de42e7038b3e709 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 14 Dec 2006 15:29:25 -0800
Subject: ocfs2: don't print error in ocfs2_permission()

Errors from generic_permission() can happen in valid cases and shouldn't be
reported.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fd590b9bde..a50447d461e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -966,8 +966,6 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 	ret = generic_permission(inode, mask, NULL);
-	if (ret)
-		mlog_errno(ret);
 
 	ocfs2_meta_unlock(inode, 0);
 out:
-- 
cgit v1.2.3


From 564f8a3228879d6962edb3432d01bcd7499a67ec Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 14 Dec 2006 13:01:05 -0800
Subject: ocfs2: Allow direct I/O read past end of file

ocfs2_direct_IO_get_blocks() was incorrectly returning -EIO for a direct I/O
read whose start block was past the end of the file allocation tree. Fix
things so that we return a hole instead. do_direct_IO() will then notice
that the range start is past eof and return a short read.

While there, remove the unused vbo_max variable.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/aops.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ef6cd30108a..93628b02ef5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -540,8 +540,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 				     struct buffer_head *bh_result, int create)
 {
 	int ret;
-	u64 vbo_max; /* file offset, max_blocks from iblock */
-	u64 p_blkno;
+	u64 p_blkno, inode_blocks;
 	int contig_blocks;
 	unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
 	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
@@ -550,12 +549,23 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	 * nicely aligned and of the right size, so there's no need
 	 * for us to check any of that. */
 
-	vbo_max = ((u64)iblock + max_blocks) << blocksize_bits;
-
 	spin_lock(&OCFS2_I(inode)->ip_lock);
-	if ((iblock + max_blocks) >
-	    ocfs2_clusters_to_blocks(inode->i_sb,
-				     OCFS2_I(inode)->ip_clusters)) {
+	inode_blocks = ocfs2_clusters_to_blocks(inode->i_sb,
+						OCFS2_I(inode)->ip_clusters);
+
+	/*
+	 * For a read which begins past the end of file, we return a hole.
+	 */
+	if (!create && (iblock >= inode_blocks)) {
+		spin_unlock(&OCFS2_I(inode)->ip_lock);
+		ret = 0;
+		goto bail;
+	}
+
+	/*
+	 * Any write past EOF is not allowed because we'd be extending.
+	 */
+	if (create && (iblock + max_blocks) > inode_blocks) {
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
 		ret = -EIO;
 		goto bail;
-- 
cgit v1.2.3


From 6c2aad0567e693f9588d0a0683f96ed872fb4641 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 19 Dec 2006 15:25:52 -0800
Subject: ocfs2: ignore NULL vfsmnt in ocfs2_should_update_atime()

This can come from NFSD.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/file.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a50447d461e..10953a508f2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -149,6 +149,17 @@ int ocfs2_should_update_atime(struct inode *inode,
 	    ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		return 0;
 
+	/*
+	 * We can be called with no vfsmnt structure - NFSD will
+	 * sometimes do this.
+	 *
+	 * Note that our action here is different than touch_atime() -
+	 * if we can't tell whether this is a noatime mount, then we
+	 * don't know whether to trust the value of s_atime_quantum.
+	 */
+	if (vfsmnt == NULL)
+		return 0;
+
 	if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
 	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		return 0;
-- 
cgit v1.2.3


From 7f4a2a97e324e8c826d1d983bc8efb5c59194f02 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Mon, 11 Dec 2006 11:06:36 -0800
Subject: ocfs2: always unmap in ocfs2_data_convert_worker()

Mmap-heavy clustered workloads were sometimes finding stale data on mmap
reads. The solution is to call unmap_mapping_range() on any down convert of
a data lock.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e6220137bf6..e335541727f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2718,6 +2718,15 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
        	inode = ocfs2_lock_res_inode(lockres);
 	mapping = inode->i_mapping;
 
+	/*
+	 * We need this before the filemap_fdatawrite() so that it can
+	 * transfer the dirty bit from the PTE to the
+	 * page. Unfortunately this means that even for EX->PR
+	 * downconverts, we'll lose our mappings and have to build
+	 * them up again.
+	 */
+	unmap_mapping_range(mapping, 0, 0, 0);
+
 	if (filemap_fdatawrite(mapping)) {
 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -2725,7 +2734,6 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 	sync_mapping_buffers(mapping);
 	if (blocking == LKM_EXMODE) {
 		truncate_inode_pages(mapping, 0);
-		unmap_mapping_range(mapping, 0, 0, 0);
 	} else {
 		/* We only need to wait on the I/O if we're not also
 		 * truncating pages because truncate_inode_pages waits
-- 
cgit v1.2.3


From 92efc15241ceebc23451691971897020e8563a70 Mon Sep 17 00:00:00 2001
From: Zhen Wei <zwei@novell.com>
Date: Fri, 8 Dec 2006 00:48:17 -0700
Subject: ocfs2: export heartbeat thread pid via configfs

The patch allows the ocfs2 heartbeat thread to prioritize I/O which may
help cut down on spurious fencing. Most of this will be in the tools -
we can have a pid configfs attribute and let userspace (ocfs2_hb_ctl)
calls the ioprio_set syscall after starting heartbeat, but only cfq
scheduler supports I/O priorities now.

Signed-off-by: Zhen Wei <zwei@novell.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a25ef5a5038..277ca67a2ad 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1447,6 +1447,15 @@ out:
 	return ret;
 }
 
+static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
+                                      char *page)
+{
+	if (!reg->hr_task)
+		return 0;
+
+	return sprintf(page, "%u\n", reg->hr_task->pid);
+}
+
 struct o2hb_region_attribute {
 	struct configfs_attribute attr;
 	ssize_t (*show)(struct o2hb_region *, char *);
@@ -1485,11 +1494,19 @@ static struct o2hb_region_attribute o2hb_region_attr_dev = {
 	.store	= o2hb_region_dev_write,
 };
 
+static struct o2hb_region_attribute o2hb_region_attr_pid = {
+       .attr   = { .ca_owner = THIS_MODULE,
+                   .ca_name = "pid",
+                   .ca_mode = S_IRUGO | S_IRUSR },
+       .show   = o2hb_region_pid_read,
+};
+
 static struct configfs_attribute *o2hb_region_attrs[] = {
 	&o2hb_region_attr_block_bytes.attr,
 	&o2hb_region_attr_start_block.attr,
 	&o2hb_region_attr_blocks.attr,
 	&o2hb_region_attr_dev.attr,
+	&o2hb_region_attr_pid.attr,
 	NULL,
 };
 
-- 
cgit v1.2.3


From 1ebb1101c556b1915ff041655e629a072e64dcda Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Fri, 29 Dec 2006 16:47:02 -0800
Subject: [PATCH] Fix lock inversion aio_kick_handler()

lockdep found a AB BC CA lock inversion in retry-based AIO:

1) The task struct's alloc_lock (A) is acquired in process context with
   interrupts enabled.  An interrupt might arrive and call wake_up() which
   grabs the wait queue's q->lock (B).

2) When performing retry-based AIO the AIO core registers
   aio_wake_function() as the wake funtion for iocb->ki_wait.  It is called
   with the wait queue's q->lock (B) held and then tries to add the iocb to
   the run list after acquiring the ctx_lock (C).

3) aio_kick_handler() holds the ctx_lock (C) while acquiring the
   alloc_lock (A) via lock_task() and unuse_mm().  Lockdep emits a warning
   saying that we're trying to connect the irq-safe q->lock to the
   irq-unsafe alloc_lock via ctx_lock.

This fixes the inversion by calling unuse_mm() in the AIO kick handing path
after we've released the ctx_lock.  As Ben LaHaise pointed out __put_ioctx
could set ctx->mm to NULL, so we must only access ctx->mm while we have the
lock.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 5f577a63bdf..ee20fc4240e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -599,9 +599,6 @@ static void use_mm(struct mm_struct *mm)
  *	by the calling kernel thread
  *	(Note: this routine is intended to be called only
  *	from a kernel thread context)
- *
- * Comments: Called with ctx->ctx_lock held. This nests
- * task_lock instead ctx_lock.
  */
 static void unuse_mm(struct mm_struct *mm)
 {
@@ -850,14 +847,16 @@ static void aio_kick_handler(struct work_struct *work)
 {
 	struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
 	mm_segment_t oldfs = get_fs();
+	struct mm_struct *mm;
 	int requeue;
 
 	set_fs(USER_DS);
 	use_mm(ctx->mm);
 	spin_lock_irq(&ctx->ctx_lock);
 	requeue =__aio_run_iocbs(ctx);
- 	unuse_mm(ctx->mm);
+	mm = ctx->mm;
 	spin_unlock_irq(&ctx->ctx_lock);
+ 	unuse_mm(mm);
 	set_fs(oldfs);
 	/*
 	 * we're in a worker thread already, don't use queue_delayed_work,
-- 
cgit v1.2.3


From 131612dfe7923bd0ce5f82d6ed8303a7ef96e574 Mon Sep 17 00:00:00 2001
From: Dimitri Gorokhovik <dimitri.gorokhovik@free.fr>
Date: Fri, 29 Dec 2006 16:48:24 -0800
Subject: [PATCH] ramfs breaks without CONFIG_BLOCK

ramfs doesn't provide the .set_dirty_page a_op, and when the BLOCK layer is
not configured in, 'set_page_dirty' makes a call via a NULL pointer.

Signed-off-by: Dimitri Gorokhovik <dimitri.gorokhovik@free.fr>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/file-mmu.c   | 4 +++-
 fs/ramfs/file-nommu.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 0947fb57dcf..54ebbc84207 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -25,11 +25,13 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mm.h>
 
 const struct address_space_operations ramfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write
+	.commit_write	= simple_commit_write,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 };
 
 const struct file_operations ramfs_file_operations = {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 61cbe1ef06b..e9d6c473328 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
@@ -30,7 +31,8 @@ static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
 const struct address_space_operations ramfs_aops = {
 	.readpage		= simple_readpage,
 	.prepare_write		= simple_prepare_write,
-	.commit_write		= simple_commit_write
+	.commit_write		= simple_commit_write,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 };
 
 const struct file_operations ramfs_file_operations = {
-- 
cgit v1.2.3


From 3223ea8cca5936b8e78450dd5b8ba88372e9c0a8 Mon Sep 17 00:00:00 2001
From: James Bursa <james@zamez.org>
Date: Fri, 5 Jan 2007 16:36:28 -0800
Subject: [PATCH] adfs: fix filename handling

Fix filenames on adfs discs being terminated at the first character greater
than 128 (adfs filenames are Latin 1).  I saw this problem when using a
loopback adfs image on a 2.6.17-rc5 x86_64 machine, and the patch fixed it
there.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/dir_f.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bbfc8625927..b9b2b27b68c 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -53,7 +53,7 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
 {
 	char *old_buf = buf;
 
-	while (*ptr >= ' ' && maxlen--) {
+	while ((unsigned char)*ptr >= ' ' && maxlen--) {
 		if (*ptr == '/')
 			*buf++ = '.';
 		else
-- 
cgit v1.2.3


From be6aab0e9fa6d3c6d75aa1e38ac972d8b4ee82b8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 5 Jan 2007 16:36:36 -0800
Subject: [PATCH] fix memory corruption from misinterpreted bad_inode_ops
 return values

CVE-2006-5753 is for a case where an inode can be marked bad, switching
the ops to bad_inode_ops, which are all connected as:

static int return_EIO(void)
{
        return -EIO;
}

#define EIO_ERROR ((void *) (return_EIO))

static struct inode_operations bad_inode_ops =
{
        .create         = bad_inode_create
...etc...

The problem here is that the void cast causes return types to not be
promoted, and for ops such as listxattr which expect more than 32 bits of
return value, the 32-bit -EIO is interpreted as a large positive 64-bit
number, i.e. 0x00000000fffffffa instead of 0xfffffffa.

This goes particularly badly when the return value is taken as a number of
bytes to copy into, say, a user's buffer for example...

I originally had coded up the fix by creating a return_EIO_<TYPE> macro
for each return type, like this:

static int return_EIO_int(void)
{
	return -EIO;
}
#define EIO_ERROR_INT ((void *) (return_EIO_int))

static struct inode_operations bad_inode_ops =
{
	.create		= EIO_ERROR_INT,
...etc...

but Al felt that it was probably better to create an EIO-returner for each
actual op signature.  Since so few ops share a signature, I just went ahead
& created an EIO function for each individual file & inode op that returns
a value.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bad_inode.c | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 289 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 34e6d7b220c..869f5193ecc 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -14,59 +14,307 @@
 #include <linux/time.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/poll.h>
 
-static int return_EIO(void)
+
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_read(struct file *filp, char __user *buf,
+			size_t size, loff_t *ppos)
+{
+        return -EIO;
+}
+
+static ssize_t bad_file_write(struct file *filp, const char __user *buf,
+			size_t siz, loff_t *ppos)
+{
+        return -EIO;
+}
+
+static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			unsigned long nr_segs, loff_t pos)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			unsigned long nr_segs, loff_t pos)
+{
+	return -EIO;
+}
+
+static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return -EIO;
+}
+
+static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
+{
+	return POLLERR;
+}
+
+static int bad_file_ioctl (struct inode *inode, struct file *filp,
+			unsigned int cmd, unsigned long arg)
+{
+	return -EIO;
+}
+
+static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
+			unsigned long arg)
+{
+	return -EIO;
+}
+
+static long bad_file_compat_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	return -EIO;
+}
+
+static int bad_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return -EIO;
+}
+
+static int bad_file_open(struct inode *inode, struct file *filp)
+{
+	return -EIO;
+}
+
+static int bad_file_flush(struct file *file, fl_owner_t id)
+{
+	return -EIO;
+}
+
+static int bad_file_release(struct inode *inode, struct file *filp)
+{
+	return -EIO;
+}
+
+static int bad_file_fsync(struct file *file, struct dentry *dentry,
+			int datasync)
+{
+	return -EIO;
+}
+
+static int bad_file_aio_fsync(struct kiocb *iocb, int datasync)
+{
+	return -EIO;
+}
+
+static int bad_file_fasync(int fd, struct file *filp, int on)
+{
+	return -EIO;
+}
+
+static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
+			size_t count, read_actor_t actor, void *target)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_sendpage(struct file *file, struct page *page,
+			int off, size_t len, loff_t *pos, int more)
+{
+	return -EIO;
+}
+
+static unsigned long bad_file_get_unmapped_area(struct file *file,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags)
+{
+	return -EIO;
+}
+
+static int bad_file_check_flags(int flags)
 {
 	return -EIO;
 }
 
-#define EIO_ERROR ((void *) (return_EIO))
+static int bad_file_dir_notify(struct file *file, unsigned long arg)
+{
+	return -EIO;
+}
+
+static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe,
+			struct file *out, loff_t *ppos, size_t len,
+			unsigned int flags)
+{
+	return -EIO;
+}
+
+static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos,
+			struct pipe_inode_info *pipe, size_t len,
+			unsigned int flags)
+{
+	return -EIO;
+}
 
 static const struct file_operations bad_file_ops =
 {
-	.llseek		= EIO_ERROR,
-	.aio_read	= EIO_ERROR,
-	.read		= EIO_ERROR,
-	.write		= EIO_ERROR,
-	.aio_write	= EIO_ERROR,
-	.readdir	= EIO_ERROR,
-	.poll		= EIO_ERROR,
-	.ioctl		= EIO_ERROR,
-	.mmap		= EIO_ERROR,
-	.open		= EIO_ERROR,
-	.flush		= EIO_ERROR,
-	.release	= EIO_ERROR,
-	.fsync		= EIO_ERROR,
-	.aio_fsync	= EIO_ERROR,
-	.fasync		= EIO_ERROR,
-	.lock		= EIO_ERROR,
-	.sendfile	= EIO_ERROR,
-	.sendpage	= EIO_ERROR,
-	.get_unmapped_area = EIO_ERROR,
+	.llseek		= bad_file_llseek,
+	.read		= bad_file_read,
+	.write		= bad_file_write,
+	.aio_read	= bad_file_aio_read,
+	.aio_write	= bad_file_aio_write,
+	.readdir	= bad_file_readdir,
+	.poll		= bad_file_poll,
+	.ioctl		= bad_file_ioctl,
+	.unlocked_ioctl	= bad_file_unlocked_ioctl,
+	.compat_ioctl	= bad_file_compat_ioctl,
+	.mmap		= bad_file_mmap,
+	.open		= bad_file_open,
+	.flush		= bad_file_flush,
+	.release	= bad_file_release,
+	.fsync		= bad_file_fsync,
+	.aio_fsync	= bad_file_aio_fsync,
+	.fasync		= bad_file_fasync,
+	.lock		= bad_file_lock,
+	.sendfile	= bad_file_sendfile,
+	.sendpage	= bad_file_sendpage,
+	.get_unmapped_area = bad_file_get_unmapped_area,
+	.check_flags	= bad_file_check_flags,
+	.dir_notify	= bad_file_dir_notify,
+	.flock		= bad_file_flock,
+	.splice_write	= bad_file_splice_write,
+	.splice_read	= bad_file_splice_read,
 };
 
+static int bad_inode_create (struct inode *dir, struct dentry *dentry,
+		int mode, struct nameidata *nd)
+{
+	return -EIO;
+}
+
+static struct dentry *bad_inode_lookup(struct inode *dir,
+			struct dentry *dentry, struct nameidata *nd)
+{
+	return ERR_PTR(-EIO);
+}
+
+static int bad_inode_link (struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	return -EIO;
+}
+
+static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+	return -EIO;
+}
+
+static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
+		const char *symname)
+{
+	return -EIO;
+}
+
+static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
+			int mode)
+{
+	return -EIO;
+}
+
+static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
+{
+	return -EIO;
+}
+
+static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
+			int mode, dev_t rdev)
+{
+	return -EIO;
+}
+
+static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	return -EIO;
+}
+
+static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
+		int buflen)
+{
+	return -EIO;
+}
+
+static int bad_inode_permission(struct inode *inode, int mask,
+			struct nameidata *nd)
+{
+	return -EIO;
+}
+
+static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat)
+{
+	return -EIO;
+}
+
+static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+	return -EIO;
+}
+
+static int bad_inode_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
+{
+	return -EIO;
+}
+
+static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name,
+			void *buffer, size_t size)
+{
+	return -EIO;
+}
+
+static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer,
+			size_t buffer_size)
+{
+	return -EIO;
+}
+
+static int bad_inode_removexattr(struct dentry *dentry, const char *name)
+{
+	return -EIO;
+}
+
 static struct inode_operations bad_inode_ops =
 {
-	.create		= EIO_ERROR,
-	.lookup		= EIO_ERROR,
-	.link		= EIO_ERROR,
-	.unlink		= EIO_ERROR,
-	.symlink	= EIO_ERROR,
-	.mkdir		= EIO_ERROR,
-	.rmdir		= EIO_ERROR,
-	.mknod		= EIO_ERROR,
-	.rename		= EIO_ERROR,
-	.readlink	= EIO_ERROR,
+	.create		= bad_inode_create,
+	.lookup		= bad_inode_lookup,
+	.link		= bad_inode_link,
+	.unlink		= bad_inode_unlink,
+	.symlink	= bad_inode_symlink,
+	.mkdir		= bad_inode_mkdir,
+	.rmdir		= bad_inode_rmdir,
+	.mknod		= bad_inode_mknod,
+	.rename		= bad_inode_rename,
+	.readlink	= bad_inode_readlink,
 	/* follow_link must be no-op, otherwise unmounting this inode
 	   won't work */
-	.truncate	= EIO_ERROR,
-	.permission	= EIO_ERROR,
-	.getattr	= EIO_ERROR,
-	.setattr	= EIO_ERROR,
-	.setxattr	= EIO_ERROR,
-	.getxattr	= EIO_ERROR,
-	.listxattr	= EIO_ERROR,
-	.removexattr	= EIO_ERROR,
+	/* put_link returns void */
+	/* truncate returns void */
+	.permission	= bad_inode_permission,
+	.getattr	= bad_inode_getattr,
+	.setattr	= bad_inode_setattr,
+	.setxattr	= bad_inode_setxattr,
+	.getxattr	= bad_inode_getxattr,
+	.listxattr	= bad_inode_listxattr,
+	.removexattr	= bad_inode_removexattr,
+	/* truncate_range returns void */
 };
 
 
@@ -88,7 +336,7 @@ static struct inode_operations bad_inode_ops =
  *	on it to fail from this point on.
  */
  
-void make_bad_inode(struct inode * inode) 
+void make_bad_inode(struct inode *inode)
 {
 	remove_inode_hash(inode);
 
@@ -113,7 +361,7 @@ EXPORT_SYMBOL(make_bad_inode);
  *	Returns true if the inode in question has been marked as bad.
  */
  
-int is_bad_inode(struct inode * inode) 
+int is_bad_inode(struct inode *inode)
 {
 	return (inode->i_op == &bad_inode_ops);	
 }
-- 
cgit v1.2.3


From d63b70902befe189ba2672925f28ec3f4db41352 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 5 Jan 2007 16:37:04 -0800
Subject: [PATCH] fix garbage instead of zeroes in UFS

Looks like this is the problem, which point Al Viro some time ago:

ufs's get_block callback allocates 16k of disk at a time, and links that
entire 16k into the file's metadata.  But because get_block is called for only
a single buffer_head (a 2k buffer_head in this case?) we are only able to tell
the VFS that this 2k is buffer_new().

So when ufs_getfrag_block() is later called to map some more data in the file,
and when that data resides within the remaining 14k of this fragment,
ufs_getfrag_block() will incorrectly return a !buffer_new() buffer_head.

I don't see _right_ way to do nullification of whole block, if use inode
page cache, some pages may be outside of inode limits (inode size), and
will be lost; if use blockdev page cache it is possible to zero real data,
if later inode page cache will be used.

The simpliest way, as can I see usage of block device page cache, but not only
mark dirty, but also sync it during "nullification".  I use my simple tests
collection, which I used for check that create,open,write,read,close works on
ufs, and I see that this patch makes ufs code 18% slower then before.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/balloc.c | 25 +++++++++++++++++++++++++
 fs/ufs/inode.c  | 41 +++++------------------------------------
 2 files changed, 30 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b8238147577..2e0021e8f36 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -275,6 +275,25 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
 	UFSD("EXIT\n");
 }
 
+static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
+			    int sync)
+{
+	struct buffer_head *bh;
+	sector_t end = beg + n;
+
+	for (; beg < end; ++beg) {
+		bh = sb_getblk(inode->i_sb, beg);
+		lock_buffer(bh);
+		memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		if (IS_SYNC(inode) || sync)
+			sync_dirty_buffer(bh);
+		brelse(bh);
+	}
+}
+
 unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
 			   unsigned goal, unsigned count, int * err, struct page *locked_page)
 {
@@ -350,6 +369,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
 			*p = cpu_to_fs32(sb, result);
 			*err = 0;
 			UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+			ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+					locked_page != NULL);
 		}
 		unlock_super(sb);
 		UFSD("EXIT, result %u\n", result);
@@ -363,6 +384,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
 	if (result) {
 		*err = 0;
 		UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+				locked_page != NULL);
 		unlock_super(sb);
 		UFSD("EXIT, result %u\n", result);
 		return result;
@@ -398,6 +421,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
 		*p = cpu_to_fs32(sb, result);
 		*err = 0;
 		UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+				locked_page != NULL);
 		unlock_super(sb);
 		if (newcount < request)
 			ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ee1eaa6f4ec..2fbab0aab68 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -156,36 +156,6 @@ out:
 	return ret;
 }
 
-static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
-{
-	lock_buffer(bh);
-	memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-	set_buffer_uptodate(bh);
-	mark_buffer_dirty(bh);
-	unlock_buffer(bh);
-	if (IS_SYNC(inode))
-		sync_dirty_buffer(bh);
-}
-
-static struct buffer_head *
-ufs_clear_frags(struct inode *inode, sector_t beg,
-		unsigned int n, sector_t want)
-{
-	struct buffer_head *res = NULL, *bh;
-	sector_t end = beg + n;
-
-	for (; beg < end; ++beg) {
-		bh = sb_getblk(inode->i_sb, beg);
-		ufs_clear_frag(inode, bh);
-		if (want != beg)
-			brelse(bh);
-		else
-			res = bh;
-	}
-	BUG_ON(!res);
-	return res;
-}
-
 /**
  * ufs_inode_getfrag() - allocate new fragment(s)
  * @inode - pointer to inode
@@ -302,7 +272,7 @@ repeat:
 	}
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
+		result = sb_getblk(sb, tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		result = NULL;
@@ -403,8 +373,7 @@ repeat:
 
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
-					 tmp + blockoff);
+		result = sb_getblk(sb, tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		*new = 1;
@@ -471,13 +440,13 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 #define GET_INODE_DATABLOCK(x) \
 	ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page)
 #define GET_INODE_PTR(x) \
-	ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page)
+	ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL)
 #define GET_INDIRECT_DATABLOCK(x) \
 	ufs_inode_getblock(inode, bh, x, fragment,	\
-			  &err, &phys, &new, bh_result->b_page);
+			  &err, &phys, &new, bh_result->b_page)
 #define GET_INDIRECT_PTR(x) \
 	ufs_inode_getblock(inode, bh, x, fragment,	\
-			  &err, NULL, NULL, bh_result->b_page);
+			  &err, NULL, NULL, NULL)
 
 	if (ptr < UFS_NDIR_FRAGMENT) {
 		bh = GET_INODE_DATABLOCK(ptr);
-- 
cgit v1.2.3


From 90cb28e8f76e57751ffe14abd09c2d53a6aea7c8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Sat, 6 Jan 2007 13:28:21 -0800
Subject: Revert "[PATCH] binfmt_elf: randomize PIE binaries (2nd try)"

This reverts commit 59287c0913cc9a6c75712a775f6c1c1ef418ef3b.

Hugh Dickins reports that it causes random failures on x86 with SuSE
10.2, and points out

  "Isn't that randomization, anywhere from 0x10000 to ELF_ET_DYN_BASE,
   sure to place the ET_DYN from time to time just where the comment
   says it's trying to avoid? I assume that somehow results in the error
   reported."

(where the comment in question is the existing comment in the source
code about mmap/brk clashes).

Suggested-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Marcus Meissner <meissner@suse.de>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d3adfd353ff..7cb28720f90 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -854,13 +854,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
-			if (current->flags & PF_RANDOMIZE)
-				load_bias = randomize_range(0x10000,
-							    ELF_ET_DYN_BASE,
-							    0);
-			else
-				load_bias = ELF_ET_DYN_BASE;
-			load_bias = ELF_PAGESTART(load_bias - vaddr);
+			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-- 
cgit v1.2.3


From 3eb3c740f51c2126b53c2dde974c1c57e634aa7b Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 10 Jan 2007 14:45:28 +0100
Subject: [PATCH] fix linux banner format string

Revert previous attempts at messing with the linux banner string and
simply use a separate format string for proc.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Cc: Andrey Borzenkov <arvidjaar@mail.ru>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 92ea7743fe8..b37ce33f67e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -47,7 +47,6 @@
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
 #include <linux/pid_namespace.h>
-#include <linux/compile.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -254,12 +253,7 @@ static int version_read_proc(char *page, char **start, off_t off,
 {
 	int len;
 
-	/* FIXED STRING! Don't touch! */
-	len = snprintf(page, PAGE_SIZE,
-		"%s version %s"
-		" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
-		" (" LINUX_COMPILER ")"
-		" %s\n",
+	len = snprintf(page, PAGE_SIZE, linux_proc_banner,
 		utsname()->sysname,
 		utsname()->release,
 		utsname()->version);
-- 
cgit v1.2.3


From e3db7691e9f3dff3289f64e3d98583e28afe03db Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 10 Jan 2007 23:15:39 -0800
Subject: [PATCH] NFS: Fix race in nfs_release_page()

    NFS: Fix race in nfs_release_page()

    invalidate_inode_pages2() may find the dirty bit has been set on a page
    owing to the fact that the page may still be mapped after it was locked.
    Only after the call to unmap_mapping_range() are we sure that the page
    can no longer be dirtied.
    In order to fix this, NFS has hooked the releasepage() method and tries
    to write the page out between the call to unmap_mapping_range() and the
    call to remove_mapping(). This, however leads to deadlocks in the page
    reclaim code, where the page may be locked without holding a reference
    to the inode or dentry.

    Fix is to add a new address_space_operation, launder_page(), which will
    attempt to write out a dirty page without releasing the page lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

    Also, the bare SetPageDirty() can skew all sort of accounting leading to
    other nasties.

[akpm@osdl.org: cleanup]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/file.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0dd6be346aa..fab20d06d93 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -315,14 +315,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	/*
-	 * Avoid deadlock on nfs_wait_on_request().
-	 */
-	if (!(gfp & __GFP_FS))
-		return 0;
-	/* Hack... Force nfs_wb_page() to write out the page */
-	SetPageDirty(page);
-	return !nfs_wb_page(page->mapping->host, page);
+	/* If PagePrivate() is set, then the page is not freeable */
+	return 0;
+}
+
+static int nfs_launder_page(struct page *page)
+{
+	return nfs_wb_page(page->mapping->host, page);
 }
 
 const struct address_space_operations nfs_file_aops = {
@@ -338,6 +337,7 @@ const struct address_space_operations nfs_file_aops = {
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+	.launder_page = nfs_launder_page,
 };
 
 static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
-- 
cgit v1.2.3


From f73ca1b76c6880b934d3ef566c1592efc80bb759 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 10 Jan 2007 23:15:41 -0800
Subject: [PATCH] Revert bd_mount_mutex back to a semaphore

Revert bd_mount_mutex back to a semaphore so that xfs_freeze -f /mnt/newtest;
xfs_freeze -u /mnt/newtest works safely and doesn't produce lockdep warnings.

(XFS unlocks the semaphore from a different task, by design.  The mutex
code warns about this)

Signed-off-by: Dave Chinner <dgc@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c       | 2 +-
 fs/buffer.c          | 6 +++---
 fs/gfs2/ops_fstype.c | 4 ++--
 fs/super.c           | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1715d6b5f41..8b18e43b82f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -411,7 +411,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		mutex_init(&bdev->bd_mutex);
-		mutex_init(&bdev->bd_mount_mutex);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
diff --git a/fs/buffer.c b/fs/buffer.c
index 263f88e4dff..3b116078b4c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -180,7 +180,7 @@ int fsync_bdev(struct block_device *bdev)
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
  *
- * This takes the block device bd_mount_mutex to make sure no new mounts
+ * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
@@ -189,7 +189,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 {
 	struct super_block *sb;
 
-	mutex_lock(&bdev->bd_mount_mutex);
+	down(&bdev->bd_mount_sem);
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
@@ -231,7 +231,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		drop_super(sb);
 	}
 
-	mutex_unlock(&bdev->bd_mount_mutex);
+	up(&bdev->bd_mount_sem);
 }
 EXPORT_SYMBOL(thaw_bdev);
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d14e139d267..ee80b8a5e7b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -867,9 +867,9 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 		error = -EBUSY;
 		goto error;
 	}
-	mutex_lock(&sb->s_bdev->bd_mount_mutex);
+	down(&sb->s_bdev->bd_mount_sem);
 	new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
-	mutex_unlock(&sb->s_bdev->bd_mount_mutex);
+	up(&sb->s_bdev->bd_mount_sem);
 	if (IS_ERR(new)) {
 		error = PTR_ERR(new);
 		goto error;
diff --git a/fs/super.c b/fs/super.c
index f961e030799..3e7458c2bb7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -753,9 +753,9 @@ int get_sb_bdev(struct file_system_type *fs_type,
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
-	mutex_lock(&bdev->bd_mount_mutex);
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-	mutex_unlock(&bdev->bd_mount_mutex);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto error_s;
 
-- 
cgit v1.2.3