222 files changed, 3655 insertions, 7410 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 61c599b4a1e..872943004e5 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -99,12 +99,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
  * @flags: mount flags
  * @dev_name: device name that was mounted
  * @data: mount options
+ * @mnt: mountpoint record to be instantiated
  *
  */
 
-static struct super_block *v9fs_get_sb(struct file_system_type
-				       *fs_type, int flags,
-				       const char *dev_name, void *data)
+static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data,
+		       struct vfsmount *mnt)
 {
 	struct super_block *sb = NULL;
 	struct v9fs_fcall *fcall = NULL;
@@ -123,17 +124,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
 	if (!v9ses)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
 		dprintk(DEBUG_ERROR, "problem initiating session\n");
-		sb = ERR_PTR(newfid);
+		retval = newfid;
 		goto out_free_session;
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
-	if (IS_ERR(sb))
+	if (IS_ERR(sb)) {
+		retval = PTR_ERR(sb);
 		goto out_close_session;
+	}
 	v9fs_fill_super(sb, v9ses, flags);
 
 	inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -184,19 +187,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 		goto put_back_sb;
 	}
 
-	return sb;
+	return simple_set_mnt(mnt, sb);
 
 out_close_session:
 	v9fs_session_close(v9ses);
 out_free_session:
 	kfree(v9ses);
-	return sb;
+	return retval;
 
 put_back_sb:
 	/* deactivate_super calls v9fs_kill_super which will frees the rest */
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	return ERR_PTR(retval);
+	return retval;
 }
 
 /**
diff --git a/fs/Kconfig b/fs/Kconfig
index 20f9b557732..2aa4624cc01 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -53,7 +53,7 @@ config EXT2_FS_SECURITY
 
 config EXT2_FS_XIP
 	bool "Ext2 execute in place support"
-	depends on EXT2_FS
+	depends on EXT2_FS && MMU
 	help
 	  Execute in place can be used on memory-backed block devices. If you
 	  enable this option, you can select to mount block devices which are
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 252abda0d20..ba1c88af49f 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -196,17 +196,17 @@ static int adfs_remount(struct super_block *sb, int *flags, char *data)
 	return parse_options(sb, data);
 }
 
-static int adfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct adfs_sb_info *asb = ADFS_SB(sb);
+	struct adfs_sb_info *asb = ADFS_SB(dentry->d_sb);
 
 	buf->f_type    = ADFS_SUPER_MAGIC;
 	buf->f_namelen = asb->s_namelen;
-	buf->f_bsize   = sb->s_blocksize;
+	buf->f_bsize   = dentry->d_sb->s_blocksize;
 	buf->f_blocks  = asb->s_size;
 	buf->f_files   = asb->s_ids_per_zone * asb->s_map_size;
 	buf->f_bavail  =
-	buf->f_bfree   = adfs_map_free(sb);
+	buf->f_bfree   = adfs_map_free(dentry->d_sb);
 	buf->f_ffree   = (long)(buf->f_bfree * buf->f_files) / (long)buf->f_blocks;
 
 	return 0;
@@ -470,10 +470,11 @@ error:
 	return -EINVAL;
 }
 
-static struct super_block *adfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int adfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type adfs_fs_type = {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4d7e5b19e5c..8765cba35bb 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -18,7 +18,7 @@
 
 extern struct timezone sys_tz;
 
-static int affs_statfs(struct super_block *sb, struct kstatfs *buf);
+static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
@@ -508,8 +508,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 static int
-affs_statfs(struct super_block *sb, struct kstatfs *buf)
+affs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	int		 free;
 
 	pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size,
@@ -524,10 +525,11 @@ affs_statfs(struct super_block *sb, struct kstatfs *buf)
 	return 0;
 }
 
-static struct super_block *affs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int affs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type affs_fs_type = {
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a6dff6a4f20..2fc99877cb0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -185,9 +185,7 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
 
 	_enter("{%lu},%lu", dir->i_ino, index);
 
-	page = read_cache_page(dir->i_mapping,index,
-			       (filler_t *) dir->i_mapping->a_ops->readpage,
-			       NULL);
+	page = read_mapping_page(dir->i_mapping, index, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 4e6eeb59b83..b5cf9e1205a 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -63,7 +63,6 @@ unsigned long afs_mntpt_expiry_timeout = 20;
 int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 {
 	struct page *page;
-	filler_t *filler;
 	size_t size;
 	char *buf;
 	int ret;
@@ -71,10 +70,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 	_enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
 
 	/* read the contents of the symlink into the pagecache */
-	filler = (filler_t *) AFS_VNODE_TO_I(vnode)->i_mapping->a_ops->readpage;
-
-	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-			       filler, NULL);
+	page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL);
 	if (IS_ERR(page)) {
 		ret = PTR_ERR(page);
 		goto out;
@@ -160,7 +156,6 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	struct page *page = NULL;
 	size_t size;
 	char *buf, *devname = NULL, *options = NULL;
-	filler_t *filler;
 	int ret;
 
 	kenter("{%s}", mntpt->d_name.name);
@@ -182,9 +177,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 		goto error;
 
 	/* read the contents of the AFS special symlink */
-	filler = (filler_t *)mntpt->d_inode->i_mapping->a_ops->readpage;
-
-	page = read_cache_page(mntpt->d_inode->i_mapping, 0, filler, NULL);
+	page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
 	if (IS_ERR(page)) {
 		ret = PTR_ERR(page);
 		goto error;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 53c56e7231a..82468df0ba5 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -38,9 +38,9 @@ struct afs_mount_params {
 static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
 			    unsigned long flags);
 
-static struct super_block *afs_get_sb(struct file_system_type *fs_type,
-				      int flags, const char *dev_name,
-				      void *data);
+static int afs_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name,
+		      void *data, struct vfsmount *mnt);
 
 static struct inode *afs_alloc_inode(struct super_block *sb);
 
@@ -294,10 +294,11 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
  * get an AFS superblock
  * - TODO: don't use get_sb_nodev(), but rather call sget() directly
  */
-static struct super_block *afs_get_sb(struct file_system_type *fs_type,
-				      int flags,
-				      const char *dev_name,
-				      void *options)
+static int afs_get_sb(struct file_system_type *fs_type,
+		      int flags,
+		      const char *dev_name,
+		      void *options,
+		      struct vfsmount *mnt)
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
@@ -311,7 +312,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
 	ret = afscm_start();
 	if (ret < 0) {
 		_leave(" = %d", ret);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	/* parse the options */
@@ -348,18 +349,19 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type,
 		goto error;
 	}
 	sb->s_flags |= MS_ACTIVE;
+	simple_set_mnt(mnt, sb);
 
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
-	_leave(" = %p", sb);
-	return sb;
+	_leave(" = 0 [%p]", 0, sb);
+	return 0;
 
  error:
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
 	afscm_stop();
 	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	return ret;
 } /* end afs_get_sb() */
 
 /*****************************************************************************/
diff --git a/fs/aio.c b/fs/aio.c
index e41e932ba48..8c34a62df7d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -777,11 +777,11 @@ out:
 static int __aio_run_iocbs(struct kioctx *ctx)
 {
 	struct kiocb *iocb;
-	LIST_HEAD(run_list);
+	struct list_head run_list;
 
 	assert_spin_locked(&ctx->ctx_lock);
 
-	list_splice_init(&ctx->run_list, &run_list);
+	list_replace_init(&ctx->run_list, &run_list);
 	while (!list_empty(&run_list)) {
 		iocb = list_entry(run_list.next, struct kiocb,
 			ki_run_list);
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index b977ece69f0..aca12375240 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -14,10 +14,10 @@
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static struct super_block *autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int autofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
 }
 
 static struct file_system_type autofs_fs_type = {
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index acecec8578c..5d9193332be 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,10 +14,10 @@
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static struct super_block *autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int autofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super);
+	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
 }
 
 static struct file_system_type autofs_fs_type = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 68ebd10f345..08201fab26c 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -49,7 +49,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
 			char **out, int *out_len);
 static void befs_put_super(struct super_block *);
 static int befs_remount(struct super_block *, int *, char *);
-static int befs_statfs(struct super_block *, struct kstatfs *);
+static int befs_statfs(struct dentry *, struct kstatfs *);
 static int parse_options(char *, befs_mount_options *);
 
 static const struct super_operations befs_sops = {
@@ -880,8 +880,9 @@ befs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 static int
-befs_statfs(struct super_block *sb, struct kstatfs *buf)
+befs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 
 	befs_debug(sb, "---> befs_statfs()");
 
@@ -899,11 +900,12 @@ befs_statfs(struct super_block *sb, struct kstatfs *buf)
 	return 0;
 }
 
-static struct super_block *
+static int
 befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
-	    void *data)
+	    void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type befs_fs_type = {
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 55a7a78332f..cf74f3d4d96 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -203,8 +203,9 @@ static void bfs_put_super(struct super_block *s)
 	s->s_fs_info = NULL;
 }
 
-static int bfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *s = dentry->d_sb;
 	struct bfs_sb_info *info = BFS_SB(s);
 	u64 id = huge_encode_dev(s->s_bdev->bd_dev);
 	buf->f_type = BFS_MAGIC;
@@ -410,10 +411,10 @@ out:
 	return -EINVAL;
 }
 
-static struct super_block *bfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
 }
 
 static struct file_system_type bfs_fs_type = {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 537893a1601..d0434406eae 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -38,15 +38,13 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/random.h>
-
+#include <linux/elf.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
 
-#include <linux/elf.h>
-
-static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
-static int load_elf_library(struct file*);
+static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
+static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 
@@ -59,15 +57,15 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  * don't even try.
  */
 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
+static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
 #else
 #define elf_core_dump	NULL
 #endif
 
 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
-# define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
+#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
 #else
-# define ELF_MIN_ALIGN	PAGE_SIZE
+#define ELF_MIN_ALIGN	PAGE_SIZE
 #endif
 
 #ifndef ELF_CORE_EFLAGS
@@ -86,7 +84,7 @@ static struct linux_binfmt elf_format = {
 		.min_coredump	= ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x)	((unsigned long)(x) > TASK_SIZE)
+#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -104,13 +102,11 @@ static int set_brk(unsigned long start, unsigned long end)
 	return 0;
 }
 
-
 /* We need to explicitly zero any fractional pages
    after the data section (i.e. bss).  This would
    contain the junk from the file that should not
-   be in memory */
-
-
+   be in memory
+ */
 static int padzero(unsigned long elf_bss)
 {
 	unsigned long nbyte;
@@ -129,7 +125,9 @@ static int padzero(unsigned long elf_bss)
 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 #define STACK_ROUND(sp, items) \
 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
-#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
+#define STACK_ALLOC(sp, len) ({ \
+	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
+	old_sp; })
 #else
 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 #define STACK_ROUND(sp, items) \
@@ -138,7 +136,7 @@ static int padzero(unsigned long elf_bss)
 #endif
 
 static int
-create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
+create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		int interp_aout, unsigned long load_addr,
 		unsigned long interp_load_addr)
 {
@@ -161,7 +159,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 	 * for userspace to get any other way, in others (i386) it is
 	 * merely difficult.
 	 */
-
 	u_platform = NULL;
 	if (k_platform) {
 		size_t len = strlen(k_platform) + 1;
@@ -171,7 +168,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 		 * evictions by the processes running on the same package. One
 		 * thing we can do is to shuffle the initial stack for them.
 		 */
-	 
+
 		p = arch_align_stack(p);
 
 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
@@ -180,9 +177,12 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 	}
 
 	/* Create the ELF interpreter info */
-	elf_info = (elf_addr_t *) current->mm->saved_auxv;
+	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 #define NEW_AUX_ENT(id, val) \
-	do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
+	do { \
+		elf_info[ei_index++] = id; \
+		elf_info[ei_index++] = val; \
+	} while (0)
 
 #ifdef ARCH_DLINFO
 	/* 
@@ -195,21 +195,22 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
-	NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
+	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
 	NEW_AUX_ENT(AT_FLAGS, 0);
 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
-	NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
-	NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
-	NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
-	NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
- 	NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
+	NEW_AUX_ENT(AT_UID, tsk->uid);
+	NEW_AUX_ENT(AT_EUID, tsk->euid);
+	NEW_AUX_ENT(AT_GID, tsk->gid);
+	NEW_AUX_ENT(AT_EGID, tsk->egid);
+ 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 	if (k_platform) {
-		NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
+		NEW_AUX_ENT(AT_PLATFORM,
+			    (elf_addr_t)(unsigned long)u_platform);
 	}
 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
-		NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
+		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 	}
 #undef NEW_AUX_ENT
 	/* AT_NULL is zero; clear the rest too */
@@ -232,7 +233,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 	/* Point sp at the lowest address on the stack */
 #ifdef CONFIG_STACK_GROWSUP
 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
-	bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
+	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 #else
 	sp = (elf_addr_t __user *)bprm->p;
 #endif
@@ -285,7 +286,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 #ifndef elf_map
 
 static unsigned long elf_map(struct file *filep, unsigned long addr,
-			struct elf_phdr *eppnt, int prot, int type)
+		struct elf_phdr *eppnt, int prot, int type)
 {
 	unsigned long map_addr;
 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
@@ -310,9 +311,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
    is only provided so that we can read a.out libraries that have
    an ELF header */
 
-static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
-				     struct file * interpreter,
-				     unsigned long *interp_load_addr)
+static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
+		struct file *interpreter, unsigned long *interp_load_addr)
 {
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
@@ -342,15 +342,15 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 		goto out;
 
 	/* Now read in all of the header information */
-
 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 	if (size > ELF_MIN_ALIGN)
 		goto out;
-	elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+	elf_phdata = kmalloc(size, GFP_KERNEL);
 	if (!elf_phdata)
 		goto out;
 
-	retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
+	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
+			     (char *)elf_phdata,size);
 	error = -EIO;
 	if (retval != size) {
 		if (retval < 0)
@@ -359,58 +359,65 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 	}
 
 	eppnt = elf_phdata;
-	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
-	  if (eppnt->p_type == PT_LOAD) {
-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-	    int elf_prot = 0;
-	    unsigned long vaddr = 0;
-	    unsigned long k, map_addr;
-
-	    if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
-	    if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-	    if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-	    vaddr = eppnt->p_vaddr;
-	    if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
-	    	elf_type |= MAP_FIXED;
-
-	    map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
-	    error = map_addr;
-	    if (BAD_ADDR(map_addr))
-	    	goto out_close;
-
-	    if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
-		load_addr = map_addr - ELF_PAGESTART(vaddr);
-		load_addr_set = 1;
-	    }
-
-	    /*
-	     * Check to see if the section's size will overflow the
-	     * allowed task size. Note that p_filesz must always be
-	     * <= p_memsize so it is only necessary to check p_memsz.
-	     */
-	    k = load_addr + eppnt->p_vaddr;
-	    if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
-		eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
-	        error = -ENOMEM;
-		goto out_close;
-	    }
-
-	    /*
-	     * Find the end of the file mapping for this phdr, and keep
-	     * track of the largest address we see for this.
-	     */
-	    k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
-	    if (k > elf_bss)
-		elf_bss = k;
-
-	    /*
-	     * Do the same thing for the memory mapping - between
-	     * elf_bss and last_bss is the bss section.
-	     */
-	    k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
-	    if (k > last_bss)
-		last_bss = k;
-	  }
+	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
+		if (eppnt->p_type == PT_LOAD) {
+			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+			int elf_prot = 0;
+			unsigned long vaddr = 0;
+			unsigned long k, map_addr;
+
+			if (eppnt->p_flags & PF_R)
+		    		elf_prot = PROT_READ;
+			if (eppnt->p_flags & PF_W)
+				elf_prot |= PROT_WRITE;
+			if (eppnt->p_flags & PF_X)
+				elf_prot |= PROT_EXEC;
+			vaddr = eppnt->p_vaddr;
+			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
+				elf_type |= MAP_FIXED;
+
+			map_addr = elf_map(interpreter, load_addr + vaddr,
+					   eppnt, elf_prot, elf_type);
+			error = map_addr;
+			if (BAD_ADDR(map_addr))
+				goto out_close;
+
+			if (!load_addr_set &&
+			    interp_elf_ex->e_type == ET_DYN) {
+				load_addr = map_addr - ELF_PAGESTART(vaddr);
+				load_addr_set = 1;
+			}
+
+			/*
+			 * Check to see if the section's size will overflow the
+			 * allowed task size. Note that p_filesz must always be
+			 * <= p_memsize so it's only necessary to check p_memsz.
+			 */
+			k = load_addr + eppnt->p_vaddr;
+			if (k > TASK_SIZE ||
+			    eppnt->p_filesz > eppnt->p_memsz ||
+			    eppnt->p_memsz > TASK_SIZE ||
+			    TASK_SIZE - eppnt->p_memsz < k) {
+				error = -ENOMEM;
+				goto out_close;
+			}
+
+			/*
+			 * Find the end of the file mapping for this phdr, and
+			 * keep track of the largest address we see for this.
+			 */
+			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
+			if (k > elf_bss)
+				elf_bss = k;
+
+			/*
+			 * Do the same thing for the memory mapping - between
+			 * elf_bss and last_bss is the bss section.
+			 */
+			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
+			if (k > last_bss)
+				last_bss = k;
+		}
 	}
 
 	/*
@@ -424,7 +431,8 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 		goto out_close;
 	}
 
-	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);	/* What we have mapped so far */
+	/* What we have mapped so far */
+	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 
 	/* Map the last of the bss segment */
 	if (last_bss > elf_bss) {
@@ -436,7 +444,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 	}
 
 	*interp_load_addr = load_addr;
-	error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
+	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
 
 out_close:
 	kfree(elf_phdata);
@@ -444,8 +452,8 @@ out:
 	return error;
 }
 
-static unsigned long load_aout_interp(struct exec * interp_ex,
-			     struct file * interpreter)
+static unsigned long load_aout_interp(struct exec *interp_ex,
+		struct file *interpreter)
 {
 	unsigned long text_data, elf_entry = ~0UL;
 	char __user * addr;
@@ -464,7 +472,7 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
 	case ZMAGIC:
 	case QMAGIC:
 		offset = N_TXTOFF(*interp_ex);
-		addr = (char __user *) N_TXTADDR(*interp_ex);
+		addr = (char __user *)N_TXTADDR(*interp_ex);
 		break;
 	default:
 		goto out;
@@ -480,7 +488,6 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
 	flush_icache_range((unsigned long)addr,
 	                   (unsigned long)addr + text_data);
 
-
 	down_write(&current->mm->mmap_sem);	
 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 		interp_ex->a_bss);
@@ -519,7 +526,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
 #endif
 }
 
-static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
  	unsigned long load_addr = 0, load_bias = 0;
@@ -528,7 +535,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	unsigned int interpreter_type = INTERPRETER_NONE;
 	unsigned char ibcs2_interpreter = 0;
 	unsigned long error;
-	struct elf_phdr * elf_ppnt, *elf_phdata;
+	struct elf_phdr *elf_ppnt, *elf_phdata;
 	unsigned long elf_bss, elf_brk;
 	int elf_exec_fileno;
 	int retval, i;
@@ -553,7 +560,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	}
 	
 	/* Get the exec-header */
-	loc->elf_ex = *((struct elfhdr *) bprm->buf);
+	loc->elf_ex = *((struct elfhdr *)bprm->buf);
 
 	retval = -ENOEXEC;
 	/* First of all, some simple consistency checks */
@@ -568,7 +575,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		goto out;
 
 	/* Now read in all of the header information */
-
 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 		goto out;
 	if (loc->elf_ex.e_phnum < 1 ||
@@ -576,18 +582,19 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		goto out;
 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 	retval = -ENOMEM;
-	elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+	elf_phdata = kmalloc(size, GFP_KERNEL);
 	if (!elf_phdata)
 		goto out;
 
-	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
+	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
+			     (char *)elf_phdata, size);
 	if (retval != size) {
 		if (retval >= 0)
 			retval = -EIO;
 		goto out_free_ph;
 	}
 
-	files = current->files;		/* Refcounted so ok */
+	files = current->files;	/* Refcounted so ok */
 	retval = unshare_files();
 	if (retval < 0)
 		goto out_free_ph;
@@ -598,7 +605,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	/* exec will make our files private anyway, but for the a.out
 	   loader stuff we need to do it earlier */
-
 	retval = get_unused_fd();
 	if (retval < 0)
 		goto out_free_fh;
@@ -620,7 +626,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			 * shared libraries - for now assume that this
 			 * is an a.out format binary
 			 */
-
 			retval = -ENOEXEC;
 			if (elf_ppnt->p_filesz > PATH_MAX || 
 			    elf_ppnt->p_filesz < 2)
@@ -628,13 +633,13 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 			retval = -ENOMEM;
 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
-							   GFP_KERNEL);
+						  GFP_KERNEL);
 			if (!elf_interpreter)
 				goto out_free_file;
 
 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
-					   elf_interpreter,
-					   elf_ppnt->p_filesz);
+					     elf_interpreter,
+					     elf_ppnt->p_filesz);
 			if (retval != elf_ppnt->p_filesz) {
 				if (retval >= 0)
 					retval = -EIO;
@@ -678,7 +683,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
 				goto out_free_interp;
-			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
+			retval = kernel_read(interpreter, 0, bprm->buf,
+					     BINPRM_BUF_SIZE);
 			if (retval != BINPRM_BUF_SIZE) {
 				if (retval >= 0)
 					retval = -EIO;
@@ -686,8 +692,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			}
 
 			/* Get the exec headers */
-			loc->interp_ex = *((struct exec *) bprm->buf);
-			loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
+			loc->interp_ex = *((struct exec *)bprm->buf);
+			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 			break;
 		}
 		elf_ppnt++;
@@ -739,7 +745,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	/* OK, we are done with that, now set up the arg stuff,
 	   and then start this sucker up */
-
 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 		char *passed_p = passed_fileno;
 		sprintf(passed_fileno, "%d", elf_exec_fileno);
@@ -759,7 +764,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	/* Discard our unneeded old files struct */
 	if (files) {
-		steal_locks(files);
 		put_files_struct(files);
 		files = NULL;
 	}
@@ -778,7 +782,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
 
-	if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
 	arch_pick_mmap_layout(current->mm);
 
@@ -799,8 +803,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	   the correct location in memory.  At this point, we assume that
 	   the image should be loaded at fixed address, not at a variable
 	   address. */
-
-	for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
+	for(i = 0, elf_ppnt = elf_phdata;
+	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		int elf_prot = 0, elf_flags;
 		unsigned long k, vaddr;
 
@@ -828,30 +832,35 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 							load_bias, nbyte)) {
 					/*
 					 * This bss-zeroing can fail if the ELF
-					 * file specifies odd protections.  So
+					 * file specifies odd protections. So
 					 * we don't check the return value
 					 */
 				}
 			}
 		}
 
-		if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
-		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
+		if (elf_ppnt->p_flags & PF_R)
+			elf_prot |= PROT_READ;
+		if (elf_ppnt->p_flags & PF_W)
+			elf_prot |= PROT_WRITE;
+		if (elf_ppnt->p_flags & PF_X)
+			elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 			elf_flags |= MAP_FIXED;
 		} else if (loc->elf_ex.e_type == ET_DYN) {
-			/* Try and get dynamic programs out of the way of the default mmap
-			   base, as well as whatever program they might try to exec.  This
-			   is because the brk will follow the loader, and is not movable.  */
+			/* Try and get dynamic programs out of the way of the
+			 * default mmap base, as well as whatever program they
+			 * might try to exec.  This is because the brk will
+			 * follow the loader, and is not movable.  */
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 		}
 
-		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
+		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
+				elf_prot, elf_flags);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			goto out_free_dentry;
@@ -868,8 +877,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			}
 		}
 		k = elf_ppnt->p_vaddr;
-		if (k < start_code) start_code = k;
-		if (start_data < k) start_data = k;
+		if (k < start_code)
+			start_code = k;
+		if (start_data < k)
+			start_data = k;
 
 		/*
 		 * Check to see if the section's size will overflow the
@@ -879,7 +890,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 		    elf_ppnt->p_memsz > TASK_SIZE ||
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
-			/* set_brk can never work.  Avoid overflows.  */
+			/* set_brk can never work. Avoid overflows. */
 			send_sig(SIGKILL, current, 0);
 			goto out_free_dentry;
 		}
@@ -967,8 +978,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
-	create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
-			load_addr, interp_load_addr);
+	create_elf_tables(bprm, &loc->elf_ex,
+			  (interpreter_type == INTERPRETER_AOUT),
+			  load_addr, interp_load_addr);
 	/* N.B. passed_fileno might not be initialized? */
 	if (interpreter_type == INTERPRETER_AOUT)
 		current->mm->arg_start += strlen(passed_fileno) + 1;
@@ -982,7 +994,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 		   and some applications "depend" upon this behavior.
 		   Since we do not have the power to recompile these, we
-		   emulate the SVr4 behavior.  Sigh.  */
+		   emulate the SVr4 behavior. Sigh. */
 		down_write(&current->mm->mmap_sem);
 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE, 0);
@@ -1037,7 +1049,6 @@ out_free_ph:
 
 /* This is really simpleminded and specialized - we are loading an
    a.out library that is given an ELF header. */
-
 static int load_elf_library(struct file *file)
 {
 	struct elf_phdr *elf_phdata;
@@ -1047,7 +1058,7 @@ static int load_elf_library(struct file *file)
 	struct elfhdr elf_ex;
 
 	error = -ENOEXEC;
-	retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
+	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
 	if (retval != sizeof(elf_ex))
 		goto out;
 
@@ -1056,7 +1067,7 @@ static int load_elf_library(struct file *file)
 
 	/* First of all, some simple consistency checks */
 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
-	   !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
+	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
 		goto out;
 
 	/* Now read in all of the header information */
@@ -1104,7 +1115,8 @@ static int load_elf_library(struct file *file)
 		goto out_free_ph;
 	}
 
-	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
+	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
+			    ELF_MIN_ALIGN - 1);
 	bss = eppnt->p_memsz + eppnt->p_vaddr;
 	if (bss > len) {
 		down_write(&current->mm->mmap_sem);
@@ -1163,7 +1175,7 @@ static int maydump(struct vm_area_struct *vma)
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
 
-	/* Dump shared memory only if mapped from an anonymous file.  */
+	/* Dump shared memory only if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED)
 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
 
@@ -1174,7 +1186,7 @@ static int maydump(struct vm_area_struct *vma)
 	return 1;
 }
 
-#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
+#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
 /* An ELF note in memory */
 struct memelfnote
@@ -1277,11 +1289,11 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
 }
 
 /*
- * fill up all the fields in prstatus from the given task struct, except registers
- * which need to be filled up separately.
+ * fill up all the fields in prstatus from the given task struct, except
+ * registers which need to be filled up separately.
  */
 static void fill_prstatus(struct elf_prstatus *prstatus,
-			struct task_struct *p, long signr) 
+		struct task_struct *p, long signr)
 {
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
@@ -1366,8 +1378,8 @@ struct elf_thread_status
 
 /*
  * In order to add the specific thread information for the elf file format,
- * we need to keep a linked list of every threads pr_status and then
- * create a single section for them in the final core file.
+ * we need to keep a linked list of every threads pr_status and then create
+ * a single section for them in the final core file.
  */
 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 {
@@ -1378,19 +1390,23 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 	fill_prstatus(&t->prstatus, p, signr);
 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);	
 	
-	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
+	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
+		  &(t->prstatus));
 	t->num_notes++;
 	sz += notesize(&t->notes[0]);
 
-	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
-		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
+	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
+								&t->fpu))) {
+		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
+			  &(t->fpu));
 		t->num_notes++;
 		sz += notesize(&t->notes[1]);
 	}
 
 #ifdef ELF_CORE_COPY_XFPREGS
 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
-		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
+		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
+			  &t->xfpu);
 		t->num_notes++;
 		sz += notesize(&t->notes[2]);
 	}
@@ -1405,7 +1421,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
+static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1434,12 +1450,12 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 	/*
 	 * We no longer stop all VM operations.
 	 * 
-	 * This is because those proceses that could possibly change map_count or
-	 * the mmap / vma pages are now blocked in do_exit on current finishing
-	 * this core dump.
+	 * This is because those proceses that could possibly change map_count
+	 * or the mmap / vma pages are now blocked in do_exit on current
+	 * finishing this core dump.
 	 *
 	 * Only ptrace can touch these memory addresses, but it doesn't change
-	 * the map_count or the pages allocated.  So no possibility of crashing
+	 * the map_count or the pages allocated. So no possibility of crashing
 	 * exists while dumping the mm->vm_next areas to the core file.
 	 */
   
@@ -1501,7 +1517,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 #endif
 
 	/* Set up header */
-	fill_elf_header(elf, segs+1);	/* including notes section */
+	fill_elf_header(elf, segs + 1);	/* including notes section */
 
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
@@ -1511,24 +1527,24 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 	 * with info from their /proc.
 	 */
 
-	fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
-	
+	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
 	fill_psinfo(psinfo, current->group_leader, current->mm);
-	fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
 	
 	numnote = 2;
 
-	auxv = (elf_addr_t *) current->mm->saved_auxv;
+	auxv = (elf_addr_t *)current->mm->saved_auxv;
 
 	i = 0;
 	do
 		i += 2;
 	while (auxv[i - 2] != AT_NULL);
 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
-		  i * sizeof (elf_addr_t), auxv);
+		  i * sizeof(elf_addr_t), auxv);
 
   	/* Try to dump the FPU. */
-	if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
+	if ((prstatus->pr_fpvalid =
+	     elf_core_copy_task_fpregs(current, regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1577,8 +1593,10 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 		phdr.p_memsz = sz;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
-		if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
-		if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
+		if (vma->vm_flags & VM_WRITE)
+			phdr.p_flags |= PF_W;
+		if (vma->vm_flags & VM_EXEC)
+			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
 		DUMP_WRITE(&phdr, sizeof(phdr));
@@ -1595,7 +1613,9 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 
 	/* write out the thread status notes section */
 	list_for_each(t, &thread_list) {
-		struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
+		struct elf_thread_status *tmp =
+				list_entry(t, struct elf_thread_status, list);
+
 		for (i = 0; i < tmp->num_notes; i++)
 			if (!writenote(&tmp->notes[i], file))
 				goto end_coredump;
@@ -1612,18 +1632,19 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 		for (addr = vma->vm_start;
 		     addr < vma->vm_end;
 		     addr += PAGE_SIZE) {
-			struct page* page;
+			struct page *page;
 			struct vm_area_struct *vma;
 
 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
 						&page, &vma) <= 0) {
-				DUMP_SEEK (file->f_pos + PAGE_SIZE);
+				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			} else {
 				if (page == ZERO_PAGE(addr)) {
-					DUMP_SEEK (file->f_pos + PAGE_SIZE);
+					DUMP_SEEK(file->f_pos + PAGE_SIZE);
 				} else {
 					void *kaddr;
-					flush_cache_page(vma, addr, page_to_pfn(page));
+					flush_cache_page(vma, addr,
+							 page_to_pfn(page));
 					kaddr = kmap(page);
 					if ((size += PAGE_SIZE) > limit ||
 					    !dump_write(file, kaddr,
@@ -1645,7 +1666,8 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 
 	if ((off_t)file->f_pos != offset) {
 		/* Sanity check */
-		printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
+		printk(KERN_WARNING
+		       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
 		       (off_t)file->f_pos, offset);
 	}
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a2e48c999c2..eba4e23b9ca 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -435,9 +435,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 				   struct elf_fdpic_params *interp_params)
 {
 	unsigned long sp, csp, nitems;
-	elf_caddr_t *argv, *envp;
+	elf_caddr_t __user *argv, *envp;
 	size_t platform_len = 0, len;
-	char *k_platform, *u_platform, *p;
+	char *k_platform;
+	char __user *u_platform, *p;
 	long hwcap;
 	int loop;
 
@@ -462,12 +463,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	if (k_platform) {
 		platform_len = strlen(k_platform) + 1;
 		sp -= platform_len;
+		u_platform = (char __user *) sp;
 		if (__copy_to_user(u_platform, k_platform, platform_len) != 0)
 			return -EFAULT;
 	}
 
-	u_platform = (char *) sp;
-
 #if defined(__i386__) && defined(CONFIG_SMP)
 	/* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
 	 * by the processes running on the same package. One thing we can do
@@ -490,7 +490,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	sp = (sp - len) & ~7UL;
 	exec_params->map_addr = sp;
 
-	if (copy_to_user((void *) sp, exec_params->loadmap, len) != 0)
+	if (copy_to_user((void __user *) sp, exec_params->loadmap, len) != 0)
 		return -EFAULT;
 
 	current->mm->context.exec_fdpic_loadmap = (unsigned long) sp;
@@ -501,7 +501,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 		sp = (sp - len) & ~7UL;
 		interp_params->map_addr = sp;
 
-		if (copy_to_user((void *) sp, interp_params->loadmap, len) != 0)
+		if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0)
 			return -EFAULT;
 
 		current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
@@ -527,7 +527,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	/* put the ELF interpreter info on the stack */
 #define NEW_AUX_ENT(nr, id, val)						\
 	do {									\
-		struct { unsigned long _id, _val; } *ent = (void *) csp;	\
+		struct { unsigned long _id, _val; } __user *ent = (void __user *) csp;	\
 		__put_user((id), &ent[nr]._id);					\
 		__put_user((val), &ent[nr]._val);				\
 	} while (0)
@@ -564,13 +564,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 
 	/* allocate room for argv[] and envv[] */
 	csp -= (bprm->envc + 1) * sizeof(elf_caddr_t);
-	envp = (elf_caddr_t *) csp;
+	envp = (elf_caddr_t __user *) csp;
 	csp -= (bprm->argc + 1) * sizeof(elf_caddr_t);
-	argv = (elf_caddr_t *) csp;
+	argv = (elf_caddr_t __user *) csp;
 
 	/* stack argc */
 	csp -= sizeof(unsigned long);
-	__put_user(bprm->argc, (unsigned long *) csp);
+	__put_user(bprm->argc, (unsigned long __user *) csp);
 
 	BUG_ON(csp != sp);
 
@@ -581,7 +581,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
 #endif
 
-	p = (char *) current->mm->arg_start;
+	p = (char __user *) current->mm->arg_start;
 	for (loop = bprm->argc; loop > 0; loop--) {
 		__put_user((elf_caddr_t) p, argv++);
 		len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
@@ -1025,7 +1025,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		/* clear the bit between beginning of mapping and beginning of PT_LOAD */
 		if (prot & PROT_WRITE && disp > 0) {
 			kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
-			clear_user((void *) maddr, disp);
+			clear_user((void __user *) maddr, disp);
 			maddr += disp;
 		}
 
@@ -1059,7 +1059,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		if (prot & PROT_WRITE && excess1 > 0) {
 			kdebug("clear[%d] ad=%lx sz=%lx",
 			       loop, maddr + phdr->p_filesz, excess1);
-			clear_user((void *) maddr + phdr->p_filesz, excess1);
+			clear_user((void __user *) maddr + phdr->p_filesz, excess1);
 		}
 
 #else
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index d73d75591a3..07a4996cca3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -203,7 +203,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		goto _error;
 
 	if (files) {
-		steal_locks(files);
 		put_files_struct(files);
 		files = NULL;
 	}
@@ -740,10 +739,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
 	return err;
 }
 
-static struct super_block *bm_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bm_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, bm_fill_super);
+	return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
 }
 
 static struct linux_binfmt misc_format = {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f5958f413bd..028d9fb9c2d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -300,10 +300,10 @@ static struct super_operations bdev_sops = {
 	.clear_inode = bdev_clear_inode,
 };
 
-static struct super_block *bd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int bd_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
+	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
 }
 
 static struct file_system_type bd_type = {
@@ -414,21 +414,31 @@ EXPORT_SYMBOL(bdput);
 static struct block_device *bd_acquire(struct inode *inode)
 {
 	struct block_device *bdev;
+
 	spin_lock(&bdev_lock);
 	bdev = inode->i_bdev;
-	if (bdev && igrab(bdev->bd_inode)) {
+	if (bdev) {
+		atomic_inc(&bdev->bd_inode->i_count);
 		spin_unlock(&bdev_lock);
 		return bdev;
 	}
 	spin_unlock(&bdev_lock);
+
 	bdev = bdget(inode->i_rdev);
 	if (bdev) {
 		spin_lock(&bdev_lock);
-		if (inode->i_bdev)
-			__bd_forget(inode);
-		inode->i_bdev = bdev;
-		inode->i_mapping = bdev->bd_inode->i_mapping;
-		list_add(&inode->i_devices, &bdev->bd_inodes);
+		if (!inode->i_bdev) {
+			/*
+			 * We take an additional bd_inode->i_count for inode,
+			 * and it's released in clear_inode() of inode.
+			 * So, we can access it via ->i_mapping always
+			 * without igrab().
+			 */
+			atomic_inc(&bdev->bd_inode->i_count);
+			inode->i_bdev = bdev;
+			inode->i_mapping = bdev->bd_inode->i_mapping;
+			list_add(&inode->i_devices, &bdev->bd_inodes);
+		}
 		spin_unlock(&bdev_lock);
 	}
 	return bdev;
@@ -438,10 +448,18 @@ static struct block_device *bd_acquire(struct inode *inode)
 
 void bd_forget(struct inode *inode)
 {
+	struct block_device *bdev = NULL;
+
 	spin_lock(&bdev_lock);
-	if (inode->i_bdev)
+	if (inode->i_bdev) {
+		if (inode->i_sb != blockdev_superblock)
+			bdev = inode->i_bdev;
 		__bd_forget(inode);
+	}
 	spin_unlock(&bdev_lock);
+
+	if (bdev)
+		iput(bdev->bd_inode);
 }
 
 int bd_claim(struct block_device *bdev, void *holder)
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f1f3a6807..373bb6292bd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,6 @@ long do_fsync(struct file *file, int datasync)
 		goto out;
 	}
 
-	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
 	/*
@@ -346,7 +345,6 @@ long do_fsync(struct file *file, int datasync)
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
 out:
 	return ret;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c262d8874ce..7520f468715 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -166,8 +166,9 @@ cifs_put_super(struct super_block *sb)
 }
 
 static int
-cifs_statfs(struct super_block *sb, struct kstatfs *buf)
+cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	int xid; 
 	int rc = -EOPNOTSUPP;
 	struct cifs_sb_info *cifs_sb;
@@ -460,9 +461,9 @@ struct super_operations cifs_super_ops = {
 	.remount_fs = cifs_remount,
 };
 
-static struct super_block *
+static int
 cifs_get_sb(struct file_system_type *fs_type,
-	    int flags, const char *dev_name, void *data)
+	    int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	int rc;
 	struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
@@ -470,7 +471,7 @@ cifs_get_sb(struct file_system_type *fs_type,
 	cFYI(1, ("Devname: %s flags: %d ", dev_name, flags));
 
 	if (IS_ERR(sb))
-		return sb;
+		return PTR_ERR(sb);
 
 	sb->s_flags = flags;
 
@@ -478,10 +479,10 @@ cifs_get_sb(struct file_system_type *fs_type,
 	if (rc) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
-		return ERR_PTR(rc);
+		return rc;
 	}
 	sb->s_flags |= MS_ACTIVE;
-	return sb;
+	return simple_set_mnt(mnt, sb);
 }
 
 static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c98755dca86..d56c0577c71 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -74,7 +74,7 @@ extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 			 size_t write_size, loff_t * poffset);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, struct dentry *, int);
-extern int cifs_flush(struct file *);
+extern int cifs_flush(struct file *, fl_owner_t id);
 extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
 extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e2b4ce1dad6..b4a18c1cab0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1079,9 +1079,9 @@ static int cifs_writepages(struct address_space *mapping,
 	unsigned int bytes_written;
 	struct cifs_sb_info *cifs_sb;
 	int done = 0;
-	pgoff_t end = -1;
+	pgoff_t end;
 	pgoff_t index;
-	int is_range = 0;
+ 	int range_whole = 0;
 	struct kvec iov[32];
 	int len;
 	int n_iov = 0;
@@ -1122,16 +1122,14 @@ static int cifs_writepages(struct address_space *mapping,
 	xid = GetXid();
 
 	pagevec_init(&pvec, 0);
-	if (wbc->sync_mode == WB_SYNC_NONE)
+	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
-	else {
-		index = 0;
-		scanned = 1;
-	}
-	if (wbc->start || wbc->end) {
-		index = wbc->start >> PAGE_CACHE_SHIFT;
-		end = wbc->end >> PAGE_CACHE_SHIFT;
-		is_range = 1;
+		end = -1;
+	} else {
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
 		scanned = 1;
 	}
 retry:
@@ -1167,7 +1165,7 @@ retry:
 				break;
 			}
 
-			if (unlikely(is_range) && (page->index > end)) {
+			if (!wbc->range_cyclic && page->index > end) {
 				done = 1;
 				unlock_page(page);
 				break;
@@ -1271,7 +1269,7 @@ retry:
 		index = 0;
 		goto retry;
 	}
-	if (!is_range)
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = index;
 
 	FreeXid(xid);
@@ -1419,7 +1417,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
  * As file closes, flush all cached write data for this inode checking
  * for write behind errors.
  */
-int cifs_flush(struct file *file)
+int cifs_flush(struct file *file, fl_owner_t id)
 {
 	struct inode * inode = file->f_dentry->d_inode;
 	int rc = 0;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 7c2642431fa..cc66c681bd1 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -164,7 +164,7 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
 	return 0;
 }
 
-int coda_flush(struct file *coda_file)
+int coda_flush(struct file *coda_file, fl_owner_t id)
 {
 	unsigned short flags = coda_file->f_flags & ~O_EXCL;
 	unsigned short coda_flags = coda_flags_to_cflags(flags);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index ada1a81df6b..87f1dc8aa24 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -36,7 +36,7 @@
 /* VFS super_block ops */
 static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
-static int coda_statfs(struct super_block *sb, struct kstatfs *buf);
+static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
 static kmem_cache_t * coda_inode_cachep;
 
@@ -278,13 +278,13 @@ struct inode_operations coda_file_inode_operations = {
 	.setattr	= coda_setattr,
 };
 
-static int coda_statfs(struct super_block *sb, struct kstatfs *buf)
+static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int error;
 	
 	lock_kernel();
 
-	error = venus_statfs(sb, buf);
+	error = venus_statfs(dentry, buf);
 
 	unlock_kernel();
 
@@ -307,10 +307,10 @@ static int coda_statfs(struct super_block *sb, struct kstatfs *buf)
 
 /* init_coda: used by filesystems.c to register coda */
 
-static struct super_block *coda_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int coda_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, coda_fill_super);
+	return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
 }
 
 struct file_system_type coda_fs_type = {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 1bae99650a9..b040eba13a7 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -611,7 +611,7 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid,
 	return error;
 }
 
-int venus_statfs(struct super_block *sb, struct kstatfs *sfs) 
+int venus_statfs(struct dentry *dentry, struct kstatfs *sfs)
 { 
         union inputArgs *inp;
         union outputArgs *outp;
@@ -620,7 +620,7 @@ int venus_statfs(struct super_block *sb, struct kstatfs *sfs)
 	insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs));
 	UPARG(CODA_STATFS);
 
-        error = coda_upcall(coda_sbp(sb), insize, &outsize, inp);
+        error = coda_upcall(coda_sbp(dentry->d_sb), insize, &outsize, inp);
 	
         if (!error) {
 		sfs->f_blocks = outp->coda_statfs.stat.f_blocks;
diff --git a/fs/compat.c b/fs/compat.c
index b1f64786a61..7e7e5bc4f3c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,7 +197,7 @@ asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs(nd.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
@@ -215,7 +215,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs(file->f_dentry, &tmp);
 	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
@@ -265,7 +265,7 @@ asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, s
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs(nd.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
@@ -286,7 +286,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs(file->f_dentry, &tmp);
 	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index f920d30478e..94dab7bdd85 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -103,10 +103,10 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static struct super_block *configfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int configfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, configfs_fill_super);
+	return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
 }
 
 static struct file_system_type configfs_fs_type = {
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 9efcc3a164e..c45d7386080 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -181,9 +181,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 		struct page *page = NULL;
 
 		if (blocknr + i < devsize) {
-			page = read_cache_page(mapping, blocknr + i,
-				(filler_t *)mapping->a_ops->readpage,
-				NULL);
+			page = read_mapping_page(mapping, blocknr + i, NULL);
 			/* synchronous error? */
 			if (IS_ERR(page))
 				page = NULL;
@@ -322,8 +320,10 @@ out:
 	return -EINVAL;
 }
 
-static int cramfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	buf->f_type = CRAMFS_MAGIC;
 	buf->f_bsize = PAGE_CACHE_SIZE;
 	buf->f_blocks = CRAMFS_SB(sb)->blocks;
@@ -528,10 +528,11 @@ static struct super_operations cramfs_ops = {
 	.statfs		= cramfs_statfs,
 };
 
-static struct super_block *cramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int cramfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type cramfs_fs_type = {
diff --git a/fs/dcache.c b/fs/dcache.c
index 940d188e5d1..313b54b2b8f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -359,12 +359,13 @@ restart:
 }
 
 /*
- * Throw away a dentry - free the inode, dput the parent.
- * This requires that the LRU list has already been
- * removed.
+ * Throw away a dentry - free the inode, dput the parent.  This requires that
+ * the LRU list has already been removed.
+ *
  * Called with dcache_lock, drops it and then regains.
+ * Called with dentry->d_lock held, drops it.
  */
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
 {
 	struct dentry * parent;
 
@@ -382,6 +383,8 @@ static inline void prune_one_dentry(struct dentry * dentry)
 /**
  * prune_dcache - shrink the dcache
  * @count: number of entries to try and free
+ * @sb: if given, ignore dentries for other superblocks
+ *         which are being unmounted.
  *
  * Shrink the dcache. This is done when we need
  * more memory, or simply when we need to unmount
@@ -392,16 +395,29 @@ static inline void prune_one_dentry(struct dentry * dentry)
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct super_block *sb)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
 		struct dentry *dentry;
 		struct list_head *tmp;
+		struct rw_semaphore *s_umount;
 
 		cond_resched_lock(&dcache_lock);
 
 		tmp = dentry_unused.prev;
+		if (unlikely(sb)) {
+			/* Try to find a dentry for this sb, but don't try
+			 * too hard, if they aren't near the tail they will
+			 * be moved down again soon
+			 */
+			int skip = count;
+			while (skip && tmp != &dentry_unused &&
+			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
+				skip--;
+				tmp = tmp->prev;
+			}
+		}
 		if (tmp == &dentry_unused)
 			break;
 		list_del_init(tmp);
@@ -427,7 +443,45 @@ static void prune_dcache(int count)
  			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		prune_one_dentry(dentry);
+		/*
+		 * If the dentry is not DCACHED_REFERENCED, it is time
+		 * to remove it from the dcache, provided the super block is
+		 * NULL (which means we are trying to reclaim memory)
+		 * or this dentry belongs to the same super block that
+		 * we want to shrink.
+		 */
+		/*
+		 * If this dentry is for "my" filesystem, then I can prune it
+		 * without taking the s_umount lock (I already hold it).
+		 */
+		if (sb && dentry->d_sb == sb) {
+			prune_one_dentry(dentry);
+			continue;
+		}
+		/*
+		 * ...otherwise we need to be sure this filesystem isn't being
+		 * unmounted, otherwise we could race with
+		 * generic_shutdown_super(), and end up holding a reference to
+		 * an inode while the filesystem is unmounted.
+		 * So we try to get s_umount, and make sure s_root isn't NULL.
+		 * (Take a local copy of s_umount to avoid a use-after-free of
+		 * `dentry').
+		 */
+		s_umount = &dentry->d_sb->s_umount;
+		if (down_read_trylock(s_umount)) {
+			if (dentry->d_sb->s_root != NULL) {
+				prune_one_dentry(dentry);
+				up_read(s_umount);
+				continue;
+			}
+			up_read(s_umount);
+		}
+		spin_unlock(&dentry->d_lock);
+		/* Cannot remove the first dentry, and it isn't appropriate
+		 * to move it to the head of the list, so give up, and try
+		 * later
+		 */
+		break;
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -630,46 +684,7 @@ void shrink_dcache_parent(struct dentry * parent)
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found);
-}
-
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not hlist_for_each_entry_rcu() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct hlist_head *head)
-{
-	struct hlist_node *lp;
-	int found;
-	do {
-		found = 0;
-		spin_lock(&dcache_lock);
-		hlist_for_each(lp, head) {
-			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-			if (!list_empty(&this->d_lru)) {
-				dentry_stat.nr_unused--;
-				list_del_init(&this->d_lru);
-			}
-
-			/* 
-			 * move only zero ref count dentries to the end 
-			 * of the unused list for prune_dcache
-			 */
-			if (!atomic_read(&this->d_count)) {
-				list_add_tail(&this->d_lru, &dentry_unused);
-				dentry_stat.nr_unused++;
-				found++;
-			}
-		}
-		spin_unlock(&dcache_lock);
-		prune_dcache(found);
-	} while(found);
+		prune_dcache(found, parent->d_sb);
 }
 
 /*
@@ -689,7 +704,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr);
+		prune_dcache(nr, NULL);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b55b4ea9a67..440128ebef3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -111,11 +111,11 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static struct super_block *debug_get_sb(struct file_system_type *fs_type,
-				        int flags, const char *dev_name,
-					void *data)
+static int debug_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, debug_fill_super);
+	return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
 }
 
 static struct file_system_type debug_fs_type = {
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 52f5059c4f3..51a97f13274 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2549,11 +2549,11 @@ static int devfs_fill_super(struct super_block *sb, void *data, int silent)
 	return -EINVAL;
 }				/*  End Function devfs_fill_super  */
 
-static struct super_block *devfs_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int devfs_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, devfs_fill_super);
+	return get_sb_single(fs_type, flags, data, devfs_fill_super, mnt);
 }
 
 static struct file_system_type devfs_fs_type = {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 14c5620b5ca..f7aef5bb584 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,10 +130,10 @@ fail:
 	return -ENOMEM;
 }
 
-static struct super_block *devpts_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int devpts_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, devpts_fill_super);
+	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
 }
 
 static struct file_system_type devpts_fs_type = {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b05d1b21877..538fb0418fb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,7 +162,7 @@ static int dio_refill_pages(struct dio *dio)
 		NULL);				/* vmas */
 	up_read(&current->mm->mmap_sem);
 
-	if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
+	if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
 		struct page *page = ZERO_PAGE(dio->curr_user_address);
 		/*
 		 * A memory fault, but the filesystem has some outstanding
@@ -535,7 +535,7 @@ static int get_more_blocks(struct dio *dio)
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
-		create = dio->rw == WRITE;
+		create = dio->rw & WRITE;
 		if (dio->lock_type == DIO_LOCKING) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
@@ -867,7 +867,7 @@ do_holes:
 				loff_t i_size_aligned;
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
-				if (dio->rw == WRITE) {
+				if (dio->rw & WRITE) {
 					page_cache_release(page);
 					return -ENOTBLK;
 				}
@@ -1045,7 +1045,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 		}
 	} /* end iovec loop */
 
-	if (ret == -ENOTBLK && rw == WRITE) {
+	if (ret == -ENOTBLK && (rw & WRITE)) {
 		/*
 		 * The remaining part of the request will be
 		 * be handled by buffered I/O when we return
@@ -1089,7 +1089,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	if (dio->is_async) {
 		int should_wait = 0;
 
-		if (dio->result < dio->size && rw == WRITE) {
+		if (dio->result < dio->size && (rw & WRITE)) {
 			dio->waiter = current;
 			should_wait = 1;
 		}
@@ -1142,7 +1142,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 			ret = transferred;
 
 		/* We could have also come here on an AIO file extend */
-		if (!is_sync_kiocb(iocb) && rw == WRITE &&
+		if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
 		    ret >= 0 && dio->result == dio->size)
 			/*
 			 * For AIO writes where we have completed the
@@ -1194,7 +1194,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
-		current->flags |= PF_SYNCWRITE;
+		rw = WRITE_SYNC;
 
 	if (bdev)
 		bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
@@ -1270,7 +1270,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * even for AIO, we need to wait for i/o to complete before
 	 * returning in this case.
 	 */
-	dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) &&
+	dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
 		(end > i_size_read(inode)));
 
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
@@ -1284,8 +1284,6 @@ out:
 		mutex_unlock(&inode->i_mutex);
 	else if (acquire_i_mutex)
 		mutex_lock(&inode->i_mutex);
-	if (rw & WRITE)
-		current->flags &= ~PF_SYNCWRITE;
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index dff623e3ddb..8ac2462ae5d 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -15,13 +15,13 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 
-static int efs_statfs(struct super_block *s, struct kstatfs *buf);
+static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
 
-static struct super_block *efs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int efs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
 }
 
 static struct file_system_type efs_fs_type = {
@@ -322,8 +322,8 @@ out_no_fs:
 	return -EINVAL;
 }
 
-static int efs_statfs(struct super_block *s, struct kstatfs *buf) {
-	struct efs_sb_info *sb = SUPER_INFO(s);
+static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) {
+	struct efs_sb_info *sb = SUPER_INFO(dentry->d_sb);
 
 	buf->f_type    = EFS_SUPER_MAGIC;	/* efs magic number */
 	buf->f_bsize   = EFS_BLOCKSIZE;		/* blocksize */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2695337d4d6..08e7e6a555c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -268,9 +268,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		   int maxevents, long timeout);
 static int eventpollfs_delete_dentry(struct dentry *dentry);
 static struct inode *ep_eventpoll_inode(void);
-static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data);
+static int eventpollfs_get_sb(struct file_system_type *fs_type,
+			      int flags, const char *dev_name,
+			      void *data, struct vfsmount *mnt);
 
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
@@ -1595,11 +1595,12 @@ eexit_1:
 }
 
 
-static struct super_block *
+static int
 eventpollfs_get_sb(struct file_system_type *fs_type, int flags,
-		   const char *dev_name, void *data)
+		   const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC);
+	return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC,
+			     mnt);
 }
 
 
diff --git a/fs/exec.c b/fs/exec.c
index d07858c0b7c..0b88bf64614 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -866,7 +866,6 @@ int flush_old_exec(struct linux_binprm * bprm)
 	bprm->mm = NULL;		/* We're using it now */
 
 	/* This is the point of no return */
-	steal_locks(files);
 	put_files_struct(files);
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d672aa9f406..3c1c9aaaca6 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -159,8 +159,7 @@ fail:
 static struct page * ext2_get_page(struct inode *dir, unsigned long n)
 {
 	struct address_space *mapping = dir->i_mapping;
-	struct page *page = read_cache_page(mapping, n,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+	struct page *page = read_mapping_page(mapping, n, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7e30bae174e..ee4ba759581 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -39,7 +39,7 @@
 static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
-static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
+static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
 
 void ext2_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
@@ -834,9 +834,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		printk ("EXT2-fs: not enough memory\n");
 		goto failed_mount;
 	}
-	percpu_counter_init(&sbi->s_freeblocks_counter);
-	percpu_counter_init(&sbi->s_freeinodes_counter);
-	percpu_counter_init(&sbi->s_dirs_counter);
 	bgl_lock_init(&sbi->s_blockgroup_lock);
 	sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
 			       GFP_KERNEL);
@@ -863,6 +860,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
+
+	percpu_counter_init(&sbi->s_freeblocks_counter,
+				ext2_count_free_blocks(sb));
+	percpu_counter_init(&sbi->s_freeinodes_counter,
+				ext2_count_free_inodes(sb));
+	percpu_counter_init(&sbi->s_dirs_counter,
+				ext2_count_dirs(sb));
 	/*
 	 * set up enough so that it can read an inode
 	 */
@@ -874,24 +878,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sb->s_root) {
 		iput(root);
 		printk(KERN_ERR "EXT2-fs: get root inode failed\n");
-		goto failed_mount2;
+		goto failed_mount3;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		dput(sb->s_root);
 		sb->s_root = NULL;
 		printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
-		goto failed_mount2;
+		goto failed_mount3;
 	}
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
 		ext2_warning(sb, __FUNCTION__,
 			"mounting ext3 filesystem as ext2");
 	ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
-	percpu_counter_mod(&sbi->s_freeblocks_counter,
-				ext2_count_free_blocks(sb));
-	percpu_counter_mod(&sbi->s_freeinodes_counter,
-				ext2_count_free_inodes(sb));
-	percpu_counter_mod(&sbi->s_dirs_counter,
-				ext2_count_dirs(sb));
 	return 0;
 
 cantfind_ext2:
@@ -899,7 +897,10 @@ cantfind_ext2:
 		printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
 		       sb->s_id);
 	goto failed_mount;
-
+failed_mount3:
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
@@ -1038,8 +1039,9 @@ restore_opts:
 	return err;
 }
 
-static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
+static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	unsigned long overhead;
 	int i;
@@ -1087,10 +1089,10 @@ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
 	return 0;
 }
 
-static struct super_block *ext2_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ext2_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
 }
 
 #ifdef CONFIG_QUOTA
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f8a5266ea1f..a60cc6ec130 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -58,7 +58,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait);
 static const char *ext3_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16]);
 static int ext3_remount (struct super_block * sb, int * flags, char * data);
-static int ext3_statfs (struct super_block * sb, struct kstatfs * buf);
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
 static void ext3_unlockfs(struct super_block *sb);
 static void ext3_write_super (struct super_block * sb);
 static void ext3_write_super_lockfs(struct super_block *sb);
@@ -499,20 +499,21 @@ static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
-       if (EXT3_I(inode)->i_acl &&
-           EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-               posix_acl_release(EXT3_I(inode)->i_acl);
-               EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-       }
-       if (EXT3_I(inode)->i_default_acl &&
-           EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-               posix_acl_release(EXT3_I(inode)->i_default_acl);
-               EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
-       }
+	if (EXT3_I(inode)->i_acl &&
+			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_acl);
+		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
+	}
+	if (EXT3_I(inode)->i_default_acl &&
+			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_default_acl);
+		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
+	}
 #endif
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
-	kfree(rsv);
+	if (unlikely(rsv))
+		kfree(rsv);
 }
 
 static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -1579,9 +1580,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	percpu_counter_init(&sbi->s_freeblocks_counter);
-	percpu_counter_init(&sbi->s_freeinodes_counter);
-	percpu_counter_init(&sbi->s_dirs_counter);
 	bgl_lock_init(&sbi->s_blockgroup_lock);
 
 	for (i = 0; i < db_count; i++) {
@@ -1601,6 +1599,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_gdb_count = db_count;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
+
+	percpu_counter_init(&sbi->s_freeblocks_counter,
+		ext3_count_free_blocks(sb));
+	percpu_counter_init(&sbi->s_freeinodes_counter,
+		ext3_count_free_inodes(sb));
+	percpu_counter_init(&sbi->s_dirs_counter,
+		ext3_count_dirs(sb));
+
 	/* per fileystem reservation list head & lock */
 	spin_lock_init(&sbi->s_rsv_window_lock);
 	sbi->s_rsv_window_root = RB_ROOT;
@@ -1639,16 +1645,16 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	if (!test_opt(sb, NOLOAD) &&
 	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
 		if (ext3_load_journal(sb, es, journal_devnum))
-			goto failed_mount2;
+			goto failed_mount3;
 	} else if (journal_inum) {
 		if (ext3_create_journal(sb, es, journal_inum))
-			goto failed_mount2;
+			goto failed_mount3;
 	} else {
 		if (!silent)
 			printk (KERN_ERR
 				"ext3: No journal on filesystem on %s\n",
 				sb->s_id);
-		goto failed_mount2;
+		goto failed_mount3;
 	}
 
 	/* We have now updated the journal if required, so we can
@@ -1671,7 +1677,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
 			printk(KERN_ERR "EXT3-fs: Journal does not support "
 			       "requested data journaling mode\n");
-			goto failed_mount3;
+			goto failed_mount4;
 		}
 	default:
 		break;
@@ -1694,13 +1700,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	if (!sb->s_root) {
 		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
 		iput(root);
-		goto failed_mount3;
+		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		dput(sb->s_root);
 		sb->s_root = NULL;
 		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
-		goto failed_mount3;
+		goto failed_mount4;
 	}
 
 	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -1723,13 +1729,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
-	percpu_counter_mod(&sbi->s_freeblocks_counter,
-		ext3_count_free_blocks(sb));
-	percpu_counter_mod(&sbi->s_freeinodes_counter,
-		ext3_count_free_inodes(sb));
-	percpu_counter_mod(&sbi->s_dirs_counter,
-		ext3_count_dirs(sb));
-
 	lock_kernel();
 	return 0;
 
@@ -1739,8 +1738,12 @@ cantfind_ext3:
 		       sb->s_id);
 	goto failed_mount;
 
-failed_mount3:
+failed_mount4:
 	journal_destroy(sbi->s_journal);
+failed_mount3:
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
@@ -2318,8 +2321,9 @@ restore_opts:
 	return err;
 }
 
-static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	struct ext3_super_block *es = sbi->s_es;
 	unsigned long overhead;
@@ -2646,10 +2650,10 @@ out:
 
 #endif
 
-static struct super_block *ext3_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ext3_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
 }
 
 static struct file_system_type ext3_fs_type = {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c1ce284f8a9..7c35d582ec1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -539,18 +539,18 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-static int fat_statfs(struct super_block *sb, struct kstatfs *buf)
+static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
 
 	/* If the count of free cluster is still unknown, counts it here. */
 	if (sbi->free_clusters == -1) {
-		int err = fat_count_free_clusters(sb);
+		int err = fat_count_free_clusters(dentry->d_sb);
 		if (err)
 			return err;
 	}
 
-	buf->f_type = sb->s_magic;
+	buf->f_type = dentry->d_sb->s_magic;
 	buf->f_bsize = sbi->cluster_size;
 	buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
 	buf->f_bfree = sbi->free_clusters;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 944652e9dde..308f2b6b502 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -210,4 +210,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 	return err;
 }
 
-EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/file_table.c b/fs/file_table.c
index bcea1998b4d..506d5307108 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -300,5 +300,5 @@ void __init files_init(unsigned long mempages)
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
-	percpu_counter_init(&nr_files);
+	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 50aae77651b..c1be118fc06 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -71,8 +71,7 @@ vxfs_get_page(struct address_space *mapping, u_long n)
 {
 	struct page *			pp;
 
-	pp = read_cache_page(mapping, n,
-			(filler_t*)mapping->a_ops->readpage, NULL);
+	pp = read_mapping_page(mapping, n, NULL);
 
 	if (!IS_ERR(pp)) {
 		wait_on_page_locked(pp);
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index b44c916d24a..b74b791fc23 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -40,6 +40,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/vfs.h>
+#include <linux/mount.h>
 
 #include "vxfs.h"
 #include "vxfs_extern.h"
@@ -55,7 +56,7 @@ MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */
 
 
 static void		vxfs_put_super(struct super_block *);
-static int		vxfs_statfs(struct super_block *, struct kstatfs *);
+static int		vxfs_statfs(struct dentry *, struct kstatfs *);
 static int		vxfs_remount(struct super_block *, int *, char *);
 
 static struct super_operations vxfs_super_ops = {
@@ -90,12 +91,12 @@ vxfs_put_super(struct super_block *sbp)
 
 /**
  * vxfs_statfs - get filesystem information
- * @sbp:	VFS superblock
+ * @dentry:	VFS dentry to locate superblock
  * @bufp:	output buffer
  *
  * Description:
  *   vxfs_statfs fills the statfs buffer @bufp with information
- *   about the filesystem described by @sbp.
+ *   about the filesystem described by @dentry.
  *
  * Returns:
  *   Zero.
@@ -107,12 +108,12 @@ vxfs_put_super(struct super_block *sbp)
  *   This is everything but complete...
  */
 static int
-vxfs_statfs(struct super_block *sbp, struct kstatfs *bufp)
+vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
 {
-	struct vxfs_sb_info		*infp = VXFS_SBI(sbp);
+	struct vxfs_sb_info		*infp = VXFS_SBI(dentry->d_sb);
 
 	bufp->f_type = VXFS_SUPER_MAGIC;
-	bufp->f_bsize = sbp->s_blocksize;
+	bufp->f_bsize = dentry->d_sb->s_blocksize;
 	bufp->f_blocks = infp->vsi_raw->vs_dsize;
 	bufp->f_bfree = infp->vsi_raw->vs_free;
 	bufp->f_bavail = 0;
@@ -241,10 +242,11 @@ out:
 /*
  * The usual module blurb.
  */
-static struct super_block *vxfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int vxfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type vxfs_fs_type = {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f3fbe2d030f..031b27a4bc9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -461,6 +461,8 @@ void sync_inodes_sb(struct super_block *sb, int wait)
 {
 	struct writeback_control wbc = {
 		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+		.range_start	= 0,
+		.range_end	= LLONG_MAX,
 	};
 	unsigned long nr_dirty = read_page_state(nr_dirty);
 	unsigned long nr_unstable = read_page_state(nr_unstable);
@@ -559,6 +561,8 @@ int write_inode_now(struct inode *inode, int sync)
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = WB_SYNC_ALL,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
 	};
 
 	if (!mapping_cap_writeback_dirty(inode->i_mapping))
@@ -619,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
 	int need_write_inode_now = 0;
 	int err2;
 
-	current->flags |= PF_SYNCWRITE;
 	if (what & OSYNC_DATA)
 		err = filemap_fdatawrite(mapping);
 	if (what & (OSYNC_METADATA|OSYNC_DATA)) {
@@ -632,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
 		if (!err)
 			err = err2;
 	}
-	current->flags &= ~PF_SYNCWRITE;
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & I_DIRTY) &&
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fc342cf7c2c..087f3b734f4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -169,7 +169,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 	return fuse_release_common(inode, file, 0);
 }
 
-static int fuse_flush(struct file *file)
+static int fuse_flush(struct file *file, fl_owner_t id)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7627022446b..a13c0f52905 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -236,8 +236,9 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
 	/* fsid is left zero */
 }
 
-static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct fuse_req *req;
 	struct fuse_statfs_out outarg;
@@ -569,11 +570,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	return err;
 }
 
-static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
-				       int flags, const char *dev_name,
-				       void *raw_data)
+static int fuse_get_sb(struct file_system_type *fs_type,
+		       int flags, const char *dev_name,
+		       void *raw_data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
 static struct file_system_type fuse_fs_type = {
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 1e44dcfe49c..13231dd5ce6 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -280,7 +280,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 	block = off >> PAGE_CACHE_SHIFT;
 	node->page_offset = off & ~PAGE_CACHE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; i++) {
-		page = read_cache_page(mapping, block++, (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, block++, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index d20131ce4b9..40035799431 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -59,7 +59,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
 	unlock_new_inode(tree->inode);
 
 	mapping = tree->inode->i_mapping;
-	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
 		goto free_tree;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1181d116117..d9227bf14e8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -80,8 +80,10 @@ static void hfs_put_super(struct super_block *sb)
  *
  * changed f_files/f_ffree to reflect the fs_ablock/free_ablocks.
  */
-static int hfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	buf->f_type = HFS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (u32)HFS_SB(sb)->fs_ablocks * HFS_SB(sb)->fs_div;
@@ -413,10 +415,11 @@ bail:
 	return res;
 }
 
-static struct super_block *hfs_get_sb(struct file_system_type *fs_type,
-				      int flags, const char *dev_name, void *data)
+static int hfs_get_sb(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data,
+		      struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
 }
 
 static struct file_system_type hfs_fs_type = {
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 9fb51632303..d128a25b74d 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -31,8 +31,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
 	dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
 	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
-	page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
-			       (filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL);
 	pptr = kmap(page);
 	curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
 	i = offset % 32;
@@ -72,8 +71,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
 		offset += PAGE_CACHE_BITS;
 		if (offset >= size)
 			break;
-		page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
-				       (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
+					 NULL);
 		curr = pptr = kmap(page);
 		if ((size ^ offset) / PAGE_CACHE_BITS)
 			end = pptr + PAGE_CACHE_BITS / 32;
@@ -119,8 +118,8 @@ found:
 		set_page_dirty(page);
 		kunmap(page);
 		offset += PAGE_CACHE_BITS;
-		page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
-				       (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
+					 NULL);
 		pptr = kmap(page);
 		curr = pptr;
 		end = pptr + PAGE_CACHE_BITS / 32;
@@ -167,7 +166,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
 	pnr = offset / PAGE_CACHE_BITS;
-	page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, pnr, NULL);
 	pptr = kmap(page);
 	curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
 	end = pptr + PAGE_CACHE_BITS / 32;
@@ -199,7 +198,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 			break;
 		set_page_dirty(page);
 		kunmap(page);
-		page = read_cache_page(mapping, ++pnr, (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, ++pnr, NULL);
 		pptr = kmap(page);
 		curr = pptr;
 		end = pptr + PAGE_CACHE_BITS / 32;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 746abc9ecf7..77bf434da67 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -440,7 +440,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 	block = off >> PAGE_CACHE_SHIFT;
 	node->page_offset = off & ~PAGE_CACHE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; block++, i++) {
-		page = read_cache_page(mapping, block, (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, block, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index effa8991999..cfc852fdd1b 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -38,7 +38,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 		goto free_tree;
 
 	mapping = tree->inode->i_mapping;
-	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
 		goto free_tree;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 7843f792a4b..0a92fa2336a 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -212,8 +212,10 @@ static void hfsplus_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	buf->f_type = HFSPLUS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift;
@@ -450,10 +452,12 @@ static void hfsplus_destroy_inode(struct inode *inode)
 
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
-static struct super_block *hfsplus_get_sb(struct file_system_type *fs_type,
-					  int flags, const char *dev_name, void *data)
+static int hfsplus_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
+			   mnt);
 }
 
 static struct file_system_type hfsplus_fs_type = {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index bf0f8e16e43..8e0d37743e7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -239,7 +239,7 @@ static int read_inode(struct inode *ino)
 	return(err);
 }
 
-int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
+int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 {
 	/* do_statfs uses struct statfs64 internally, but the linux kernel
 	 * struct statfs still has 32-bit versions for most of these fields,
@@ -252,7 +252,7 @@ int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
 	long long f_files;
 	long long f_ffree;
 
-	err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
+	err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename,
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
 			&sf->f_namelen, sf->f_spare);
@@ -993,11 +993,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	return(err);
 }
 
-static struct super_block *hostfs_read_sb(struct file_system_type *type,
-					     int flags, const char *dev_name,
-					     void *data)
+static int hostfs_read_sb(struct file_system_type *type,
+			  int flags, const char *dev_name,
+			  void *data, struct vfsmount *mnt)
 {
-	return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
+	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
 }
 
 static struct file_system_type hostfs_type = {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index d72d8c87c99..f798480a363 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -135,8 +135,9 @@ static unsigned count_bitmaps(struct super_block *s)
 	return count;
 }
 
-static int hpfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *s = dentry->d_sb;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	lock_kernel();
 
@@ -662,10 +663,11 @@ bail0:
 	return -EINVAL;
 }
 
-static struct super_block *hpfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int hpfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type hpfs_fs_type = {
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 5e6363be246..3a9bdf58166 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -616,7 +616,7 @@ static const struct file_operations hppfs_dir_fops = {
 	.fsync		= hppfs_fsync,
 };
 
-static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
+static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 {
 	sf->f_blocks = 0;
 	sf->f_bfree = 0;
@@ -769,11 +769,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
 	return(err);
 }
 
-static struct super_block *hppfs_read_super(struct file_system_type *type,
-					     int flags, const char *dev_name,
-					     void *data)
+static int hppfs_read_super(struct file_system_type *type,
+			    int flags, const char *dev_name,
+			    void *data, struct vfsmount *mnt)
 {
-	return(get_sb_nodev(type, flags, data, hppfs_fill_super));
+	return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
 }
 
 static struct file_system_type hppfs_type = {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3a5b4e92345..e6410d8edd0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -59,7 +59,6 @@ static void huge_pagevec_release(struct pagevec *pvec)
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 	loff_t len, vma_len;
 	int ret;
 
@@ -87,9 +86,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 		goto out;
 
-	if (vma->vm_flags & VM_MAYSHARE)
-		if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0)
-			goto out;
+	if (vma->vm_flags & VM_MAYSHARE &&
+	    hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
+				  len >> HPAGE_SHIFT))
+		goto out;
 
 	ret = 0;
 	hugetlb_prefault_arch_hook(vma->vm_mm);
@@ -195,12 +195,8 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
-	int i;
+	int i, freed = 0;
 
-	hugetlb_truncate_reservation(HUGETLBFS_I(inode),
-				     lstart >> HPAGE_SHIFT);
-	if (!mapping->nrpages)
-		return;
 	pagevec_init(&pvec, 0);
 	next = start;
 	while (1) {
@@ -221,10 +217,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 			truncate_huge_page(page);
 			unlock_page(page);
 			hugetlb_put_quota(mapping);
+			freed++;
 		}
 		huge_pagevec_release(&pvec);
 	}
 	BUG_ON(!lstart && mapping->nrpages);
+	hugetlb_unreserve_pages(inode, start, freed);
 }
 
 static void hugetlbfs_delete_inode(struct inode *inode)
@@ -366,6 +364,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
 		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
 		switch (mode & S_IFMT) {
@@ -467,9 +466,9 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 	return 0;
 }
 
-static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
 
 	buf->f_type = HUGETLBFS_MAGIC;
 	buf->f_bsize = HPAGE_SIZE;
@@ -538,7 +537,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 	}
-	p->prereserved_hpages = 0;
 	return &p->vfs_inode;
 }
 
@@ -723,10 +721,10 @@ void hugetlb_put_quota(struct address_space *mapping)
 	}
 }
 
-static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int hugetlbfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
 }
 
 static struct file_system_type hugetlbfs_fs_type = {
@@ -781,8 +779,7 @@ struct file *hugetlb_zero_setup(size_t size)
 		goto out_file;
 
 	error = -ENOMEM;
-	if (hugetlb_extend_reservation(HUGETLBFS_I(inode),
-				       size >> HPAGE_SHIFT) != 0)
+	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 9e9931e2bad..f2386442ade 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -672,11 +672,11 @@ out:
 	return ret;
 }
 
-static struct super_block *
+static int
 inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data)
+	       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-    return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
+	return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt);
 }
 
 static struct file_system_type inotify_fs_type = {
diff --git a/fs/ioprio.c b/fs/ioprio.c
index ca77008146c..7fa76ed53c1 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -24,15 +24,21 @@
 #include <linux/blkdev.h>
 #include <linux/capability.h>
 #include <linux/syscalls.h>
+#include <linux/security.h>
 
 static int set_task_ioprio(struct task_struct *task, int ioprio)
 {
+	int err;
 	struct io_context *ioc;
 
 	if (task->uid != current->euid &&
 	    task->uid != current->uid && !capable(CAP_SYS_NICE))
 		return -EPERM;
 
+	err = security_task_setioprio(task, ioprio);
+	if (err)
+		return err;
+
 	task_lock(task);
 
 	task->ioprio = ioprio;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 70adbb98bad..3f9c8ba1fa1 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -56,7 +56,7 @@ static void isofs_put_super(struct super_block *sb)
 }
 
 static void isofs_read_inode(struct inode *);
-static int isofs_statfs (struct super_block *, struct kstatfs *);
+static int isofs_statfs (struct dentry *, struct kstatfs *);
 
 static kmem_cache_t *isofs_inode_cachep;
 
@@ -901,8 +901,10 @@ out_freesbi:
 	return -EINVAL;
 }
 
-static int isofs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	buf->f_type = ISOFS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = (ISOFS_SB(sb)->s_nzones
@@ -1399,10 +1401,11 @@ struct inode *isofs_iget(struct super_block *sb,
 	return inode;
 }
 
-static struct super_block *isofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int isofs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type iso9660_fs_type = {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 3f5102b069d..47678a26c13 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,67 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction.
+ * Unlink a buffer from a transaction checkpoint list.
  *
  * Called with j_list_lock held.
  */
-
-static inline void __buffer_unlink(struct journal_head *jh)
+static inline void __buffer_unlink_first(struct journal_head *jh)
 {
-	transaction_t *transaction;
-
-	transaction = jh->b_cp_transaction;
-	jh->b_cp_transaction = NULL;
+	transaction_t *transaction = jh->b_cp_transaction;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
+	if (transaction->t_checkpoint_list == jh) {
 		transaction->t_checkpoint_list = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
-		transaction->t_checkpoint_list = NULL;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
 }
 
 /*
  * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it.
+ * Returns 1 if we released it and 2 if we also released the
+ * whole transaction.
+ *
  * Requires j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
@@ -57,12 +95,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		__journal_remove_checkpoint(jh);
+		ret = __journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
 		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
-		ret = 1;
 	} else {
 		jbd_unlock_bh_state(bh);
 	}
@@ -117,83 +154,54 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 }
 
 /*
- * Clean up a transaction's checkpoint list.
- *
- * We wait for any pending IO to complete and make sure any clean
- * buffers are removed from the transaction.
- *
- * Return 1 if we performed any actions which might have destroyed the
- * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
- * the last checkpoint buffer is cleansed)
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that we take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
  *
  * Called with j_list_lock held.
  */
-static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
 {
-	struct journal_head *jh, *next_jh, *last_jh;
+	struct journal_head *jh;
 	struct buffer_head *bh;
-	int ret = 0;
-
-	assert_spin_locked(&journal->j_list_lock);
-	jh = transaction->t_checkpoint_list;
-	if (!jh)
-		return 0;
-
-	last_jh = jh->b_cpprev;
-	next_jh = jh;
-	do {
-		jh = next_jh;
+	tid_t this_tid;
+	int released = 0;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Did somebody clean up the transaction in the meanwhile? */
+	if (journal->j_checkpoint_transactions != transaction ||
+			transaction->t_tid != this_tid)
+		return;
+	while (!released && transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
 		bh = jh2bh(jh);
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		if (buffer_locked(bh)) {
 			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
 			__brelse(bh);
-			goto out_return_1;
+			spin_lock(&journal->j_list_lock);
+			goto restart;
 		}
-
 		/*
-		 * This is foul
+		 * Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list
 		 */
-		if (!jbd_trylock_bh_state(bh)) {
-			jbd_sync_bh(journal, bh);
-			goto out_return_1;
-		}
-
-		if (jh->b_transaction != NULL) {
-			transaction_t *t = jh->b_transaction;
-			tid_t tid = t->t_tid;
-
-			spin_unlock(&journal->j_list_lock);
-			jbd_unlock_bh_state(bh);
-			log_start_commit(journal, tid);
-			log_wait_commit(journal, tid);
-			goto out_return_1;
-		}
-
-		/*
-		 * AKPM: I think the buffer_jbddirty test is redundant - it
-		 * shouldn't have NULL b_transaction?
-		 */
-		next_jh = jh->b_cpnext;
-		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
-			BUFFER_TRACE(bh, "remove from checkpoint");
-			__journal_remove_checkpoint(jh);
-			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);
-			__brelse(bh);
-			ret = 1;
-		} else {
-			jbd_unlock_bh_state(bh);
-		}
-	} while (jh != last_jh);
-
-	return ret;
-out_return_1:
-	spin_lock(&journal->j_list_lock);
-	return 1;
+		released = __journal_remove_checkpoint(jh);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
 }
 
 #define NR_BATCH	64
@@ -203,9 +211,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
-	spin_unlock(&journal->j_list_lock);
 	ll_rw_block(SWRITE, *batch_count, bhs);
-	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
@@ -221,19 +227,43 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Return 1 if something happened which requires us to abort the current
  * scan of the checkpoint list.  
  *
- * Called with j_list_lock held.
+ * Called with j_list_lock held and drops it if 1 is returned
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
-static int __flush_buffer(journal_t *journal, struct journal_head *jh,
-			struct buffer_head **bhs, int *batch_count,
-			int *drop_count)
+static int __process_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count)
 {
 	struct buffer_head *bh = jh2bh(jh);
 	int ret = 0;
 
-	if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
-		J_ASSERT_JH(jh, jh->b_transaction == NULL);
+	if (buffer_locked(bh)) {
+		atomic_inc(&bh->b_count);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		wait_on_buffer(bh);
+		/* the journal_head may have gone by now */
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+		ret = 1;
+	} else if (jh->b_transaction != NULL) {
+		transaction_t *t = jh->b_transaction;
+		tid_t tid = t->t_tid;
 
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		log_start_commit(journal, tid);
+		log_wait_commit(journal, tid);
+		ret = 1;
+	} else if (!buffer_dirty(bh)) {
+		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+		BUFFER_TRACE(bh, "remove from checkpoint");
+		__journal_remove_checkpoint(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+		ret = 1;
+	} else {
 		/*
 		 * Important: we are about to write the buffer, and
 		 * possibly block, while still holding the journal lock.
@@ -246,45 +276,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
+			spin_unlock(&journal->j_list_lock);
 			__flush_batch(journal, bhs, batch_count);
 			ret = 1;
 		}
-	} else {
-		int last_buffer = 0;
-		if (jh->b_cpnext == jh) {
-			/* We may be about to drop the transaction.  Tell the
-			 * caller that the lists have changed.
-			 */
-			last_buffer = 1;
-		}
-		if (__try_to_free_cp_buf(jh)) {
-			(*drop_count)++;
-			ret = last_buffer;
-		}
 	}
 	return ret;
 }
 
 /*
- * Perform an actual checkpoint.  We don't write out only enough to
- * satisfy the current blocked requests: rather we submit a reasonably
- * sized chunk of the outstanding data to disk at once for
- * efficiency.  __log_wait_for_space() will retry if we didn't free enough.
+ * Perform an actual checkpoint. We take the first transaction on the
+ * list of transactions to be checkpointed and send all its buffers
+ * to disk. We submit larger chunks of data at once.
  * 
- * However, we _do_ take into account the amount requested so that once
- * the IO has been queued, we can return as soon as enough of it has
- * completed to disk.
- *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
 {
+	transaction_t *transaction;
+	tid_t this_tid;
 	int result;
-	int batch_count = 0;
-	struct buffer_head *bhs[NR_BATCH];
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -299,79 +314,68 @@ int log_do_checkpoint(journal_t *journal)
 		return result;
 
 	/*
-	 * OK, we need to start writing disk blocks.  Try to free up a
-	 * quarter of the log in a single checkpoint if we can.
+	 * OK, we need to start writing disk blocks.  Take one transaction
+	 * and write it.
 	 */
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions)
+		goto out;
+	transaction = journal->j_checkpoint_transactions;
+	this_tid = transaction->t_tid;
+restart:
 	/*
-	 * AKPM: check this code.  I had a feeling a while back that it
-	 * degenerates into a busy loop at unmount time.
+	 * If someone cleaned up this transaction while we slept, we're
+	 * done (maybe it's a new transaction, but it fell at the same
+	 * address).
 	 */
-	spin_lock(&journal->j_list_lock);
-	while (journal->j_checkpoint_transactions) {
-		transaction_t *transaction;
-		struct journal_head *jh, *last_jh, *next_jh;
-		int drop_count = 0;
-		int cleanup_ret, retry = 0;
-		tid_t this_tid;
-
-		transaction = journal->j_checkpoint_transactions;
-		this_tid = transaction->t_tid;
-		jh = transaction->t_checkpoint_list;
-		last_jh = jh->b_cpprev;
-		next_jh = jh;
-		do {
+	if (journal->j_checkpoint_transactions == transaction &&
+			transaction->t_tid == this_tid) {
+		int batch_count = 0;
+		struct buffer_head *bhs[NR_BATCH];
+		struct journal_head *jh;
+		int retry = 0;
+
+		while (!retry && transaction->t_checkpoint_list) {
 			struct buffer_head *bh;
 
-			jh = next_jh;
-			next_jh = jh->b_cpnext;
+			jh = transaction->t_checkpoint_list;
 			bh = jh2bh(jh);
 			if (!jbd_trylock_bh_state(bh)) {
 				jbd_sync_bh(journal, bh);
-				spin_lock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
-			if (cond_resched_lock(&journal->j_list_lock)) {
+			retry = __process_buffer(journal, jh, bhs,&batch_count);
+			if (!retry && lock_need_resched(&journal->j_list_lock)){
+				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-		} while (jh != last_jh && !retry);
+		}
 
 		if (batch_count) {
+			if (!retry) {
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+			}
 			__flush_batch(journal, bhs, &batch_count);
-			retry = 1;
 		}
 
+		if (retry) {
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		/*
-		 * If someone cleaned up this transaction while we slept, we're
-		 * done
-		 */
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
-		if (retry)
-			continue;
-		/*
-		 * Maybe it's a new transaction, but it fell at the same
-		 * address
-		 */
-		if (transaction->t_tid != this_tid)
-			continue;
-		/*
-		 * We have walked the whole transaction list without
-		 * finding anything to write to disk.  We had better be
-		 * able to make some progress or we are in trouble.
+		 * Now we have cleaned up the first transaction's checkpoint
+		 * list. Let's clean up the second one
 		 */
-		cleanup_ret = __cleanup_transaction(journal, transaction);
-		J_ASSERT(drop_count != 0 || cleanup_ret != 0);
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
+		__wait_cp_io(journal, transaction);
 	}
+out:
 	spin_unlock(&journal->j_list_lock);
 	result = cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
-
 	return 0;
 }
 
@@ -456,52 +460,98 @@ int cleanup_journal_tail(journal_t *journal)
 /* Checkpoint list management */
 
 /*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret, freed = 0;
+
+	*released = 0;
+	if (!jh)
+		return 0;
+
+ 	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh))) {
+			ret = __try_to_free_cp_buf(jh);
+			if (ret) {
+				freed++;
+				if (ret == 2) {
+					*released = 1;
+					return freed;
+				}
+			}
+		}
+		/*
+		 * This function only frees up some memory
+		 * if possible so we dont have an obligation
+		 * to finish processing. Bail out if preemption
+		 * requested:
+		 */
+		if (need_resched())
+			return freed;
+	} while (jh != last_jh);
+
+	return freed;
+}
+
+/*
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
+ * Returns number of buffers reaped (for debug)
  */
 
 int __journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
+	int released;
 
 	transaction = journal->j_checkpoint_transactions;
-	if (transaction == 0)
+	if (!transaction)
 		goto out;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
-		struct journal_head *jh;
-
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		jh = transaction->t_checkpoint_list;
-		if (jh) {
-			struct journal_head *last_jh = jh->b_cpprev;
-			struct journal_head *next_jh = jh;
-
-			do {
-				jh = next_jh;
-				next_jh = jh->b_cpnext;
-				/* Use trylock because of the ranknig */
-				if (jbd_trylock_bh_state(jh2bh(jh)))
-					ret += __try_to_free_cp_buf(jh);
-				/*
-				 * This function only frees up some memory
-				 * if possible so we dont have an obligation
-				 * to finish processing. Bail out if preemption
-				 * requested:
-				 */
-				if (need_resched())
-					goto out;
-			} while (jh != last_jh);
-		}
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list, &released);
+		/*
+		 * This function only frees up some memory if possible so we
+		 * dont have an obligation to finish processing. Bail out if
+		 * preemption requested:
+		 */
+		if (need_resched())
+			goto out;
+		if (released)
+			continue;
+		/*
+		 * It is essential that we are as careful as in the case of
+		 * t_checkpoint_list with removing the buffer from the list as
+		 * we can possibly see not yet submitted buffers on io_list
+		 */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list, &released);
+		if (need_resched())
+			goto out;
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -516,18 +566,22 @@ out:
  * buffer updates committed in that transaction have safely been stored
  * elsewhere on disk.  To achieve this, all of the buffers in a
  * transaction need to be maintained on the transaction's checkpoint
- * list until they have been rewritten, at which point this function is
+ * lists until they have been rewritten, at which point this function is
  * called to remove the buffer from the existing transaction's
- * checkpoint list.
+ * checkpoint lists.
+ *
+ * The function returns 1 if it frees the transaction, 0 otherwise.
  *
  * This function is called with the journal locked.
  * This function is called with j_list_lock held.
+ * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-void __journal_remove_checkpoint(struct journal_head *jh)
+int __journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
+	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -538,8 +592,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL)
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -565,8 +621,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
 	wake_up(&journal->j_wait_logspace);
+	ret = 1;
 out:
 	JBUFFER_TRACE(jh, "exit");
+	return ret;
 }
 
 /*
@@ -628,6 +686,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 002ad2bbc76..0971814c38b 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -790,11 +790,22 @@ restart_loop:
 			jbd_unlock_bh_state(bh);
 		} else {
 			J_ASSERT_BH(bh, !buffer_dirty(bh));
-			J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-			__journal_unfile_buffer(jh);
-			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);  /* needs a brelse */
-			release_buffer_page(bh);
+			/* The buffer on BJ_Forget list and not jbddirty means
+			 * it has been freed by this transaction and hence it
+			 * could not have been reallocated until this
+			 * transaction has committed. *BUT* it could be
+			 * reallocated once we have written all the data to
+			 * disk and before we process the buffer on BJ_Forget
+			 * list. */
+			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+			__journal_refile_buffer(jh);
+			if (!jh->b_transaction) {
+				jbd_unlock_bh_state(bh);
+				 /* needs a brelse */
+				journal_remove_journal_head(bh);
+				release_buffer_page(bh);
+			} else
+				jbd_unlock_bh_state(bh);
 		}
 		cond_resched_lock(&journal->j_list_lock);
 	}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c609f5034fc..508b2ea91f4 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -227,7 +227,8 @@ repeat_locked:
 	spin_unlock(&transaction->t_handle_lock);
 	spin_unlock(&journal->j_state_lock);
 out:
-	kfree(new_transaction);
+	if (unlikely(new_transaction))		/* It's usually NULL */
+		kfree(new_transaction);
 	return ret;
 }
 
@@ -724,7 +725,8 @@ done:
 	journal_cancel_revoke(handle, jh);
 
 out:
-	kfree(frozen_buffer);
+	if (unlikely(frozen_buffer))	/* It's usually NULL */
+		kfree(frozen_buffer);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
@@ -903,7 +905,8 @@ repeat:
 	jbd_unlock_bh_state(bh);
 out:
 	journal_put_journal_head(jh);
-	kfree(committed_data);
+	if (unlikely(committed_data))
+		kfree(committed_data);
 	return err;
 }
 
@@ -2038,7 +2041,8 @@ void __journal_refile_buffer(struct journal_head *jh)
 	__journal_temp_unlink_buffer(jh);
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
-	__journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
+	__journal_file_buffer(jh, jh->b_transaction,
+				was_dirty ? BJ_Metadata : BJ_Reserved);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
 	if (was_dirty)
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 020cc097c53..9e46ea6da75 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -377,9 +377,9 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
 
 /* Get statistics of the file system.  */
 static int
-jffs_statfs(struct super_block *sb, struct kstatfs *buf)
+jffs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct jffs_control *c = (struct jffs_control *) sb->s_fs_info;
+	struct jffs_control *c = (struct jffs_control *) dentry->d_sb->s_fs_info;
 	struct jffs_fmcontrol *fmc;
 
 	lock_kernel();
@@ -1785,10 +1785,11 @@ static struct super_operations jffs_ops =
 	.remount_fs	= jffs_remount,
 };
 
-static struct super_block *jffs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int jffs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type jffs_fs_type = {
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 7b6c24b14f8..2900ec3ec3a 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -192,9 +192,9 @@ int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
 	return rc;
 }
 
-int jffs2_statfs(struct super_block *sb, struct kstatfs *buf)
+int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(dentry->d_sb);
 	unsigned long avail;
 
 	buf->f_type = JFFS2_SUPER_MAGIC;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index cd4021bcb94..6b522356540 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -175,7 +175,7 @@ void jffs2_clear_inode (struct inode *);
 void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
-int jffs2_statfs (struct super_block *, struct kstatfs *);
+int jffs2_statfs (struct dentry *, struct kstatfs *);
 void jffs2_write_super (struct super_block *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 9d0521451f5..2378a662c25 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -111,9 +111,10 @@ static int jffs2_sb_set(struct super_block *sb, void *data)
 	return 0;
 }
 
-static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data, struct mtd_info *mtd)
+static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
+			    int flags, const char *dev_name,
+			    void *data, struct mtd_info *mtd,
+			    struct vfsmount *mnt)
 {
 	struct super_block *sb;
 	struct jffs2_sb_info *c;
@@ -121,19 +122,20 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
 
 	c = kmalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	memset(c, 0, sizeof(*c));
 	c->mtd = mtd;
 
 	sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
 
 	if (IS_ERR(sb))
-		goto out_put;
+		goto out_error;
 
 	if (sb->s_root) {
 		/* New mountpoint for JFFS2 which is already mounted */
 		D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n",
 			  mtd->index, mtd->name));
+		ret = simple_set_mnt(mnt, sb);
 		goto out_put;
 	}
 
@@ -161,44 +163,47 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
 		/* Failure case... */
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	sb->s_flags |= MS_ACTIVE;
-	return sb;
+	return simple_set_mnt(mnt, sb);
 
+out_error:
+	ret = PTR_ERR(sb);
  out_put:
 	kfree(c);
 	put_mtd_device(mtd);
 
-	return sb;
+	return ret;
 }
 
-static struct super_block *jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
-					      int flags, const char *dev_name,
-					      void *data, int mtdnr)
+static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type,
+			      int flags, const char *dev_name,
+			      void *data, int mtdnr,
+			      struct vfsmount *mnt)
 {
 	struct mtd_info *mtd;
 
 	mtd = get_mtd_device(NULL, mtdnr);
 	if (!mtd) {
 		D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr));
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
-	return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd);
+	return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
 }
 
-static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int jffs2_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
 	int err;
 	struct nameidata nd;
 	int mtdnr;
 
 	if (!dev_name)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name));
 
@@ -220,7 +225,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 				mtd = get_mtd_device(NULL, mtdnr);
 				if (mtd) {
 					if (!strcmp(mtd->name, dev_name+4))
-						return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd);
+						return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt);
 					put_mtd_device(mtd);
 				}
 			}
@@ -233,7 +238,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 			if (!*endptr) {
 				/* It was a valid number */
 				D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr));
-				return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr);
+				return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
 			}
 		}
 	}
@@ -247,7 +252,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 		  err, nd.dentry->d_inode));
 
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	err = -EINVAL;
 
@@ -269,11 +274,11 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type,
 	mtdnr = iminor(nd.dentry->d_inode);
 	path_release(&nd);
 
-	return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr);
+	return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt);
 
 out:
 	path_release(&nd);
-	return ERR_PTR(err);
+	return err;
 }
 
 static void jffs2_put_super (struct super_block *sb)
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 2b220dd6b4e..7f6e8803970 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -632,10 +632,9 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		}
 		SetPageUptodate(page);
 	} else {
-		page = read_cache_page(mapping, page_index,
-			    (filler_t *)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, page_index, NULL);
 		if (IS_ERR(page) || !PageUptodate(page)) {
-			jfs_err("read_cache_page failed!");
+			jfs_err("read_mapping_page failed!");
 			return NULL;
 		}
 		lock_page(page);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index db6f41d6dd6..73d2aba084c 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -139,9 +139,9 @@ static void jfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
-static int jfs_statfs(struct super_block *sb, struct kstatfs *buf)
+static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
 	s64 maxinodes;
 	struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
 
@@ -565,10 +565,11 @@ static void jfs_unlockfs(struct super_block *sb)
 	}
 }
 
-static struct super_block *jfs_get_sb(struct file_system_type *fs_type, 
-	int flags, const char *dev_name, void *data)
+static int jfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
+			   mnt);
 }
 
 static int jfs_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/libfs.c b/fs/libfs.c
index 7145ba7a48d..1b115638178 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,9 +20,9 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 }
 
-int simple_statfs(struct super_block *sb, struct kstatfs *buf)
+int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	buf->f_type = sb->s_magic;
+	buf->f_type = dentry->d_sb->s_magic;
 	buf->f_bsize = PAGE_CACHE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	return 0;
@@ -196,9 +196,9 @@ struct inode_operations simple_dir_inode_operations = {
  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  * will never be mountable)
  */
-struct super_block *
-get_sb_pseudo(struct file_system_type *fs_type, char *name,
-	struct super_operations *ops, unsigned long magic)
+int get_sb_pseudo(struct file_system_type *fs_type, char *name,
+	struct super_operations *ops, unsigned long magic,
+	struct vfsmount *mnt)
 {
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 	static struct super_operations default_ops = {.statfs = simple_statfs};
@@ -207,7 +207,7 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	struct qstr d_name = {.name = name, .len = strlen(name)};
 
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 
 	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = ~0ULL;
@@ -232,12 +232,12 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 
 Enomem:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
diff --git a/fs/locks.c b/fs/locks.c
index ab61a8b5482..1ad29c9b625 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -703,7 +703,7 @@ EXPORT_SYMBOL(posix_test_lock);
  * from a broken NFS client. But broken NFS clients have a lot more to
  * worry about than proper deadlock detection anyway... --okir
  */
-int posix_locks_deadlock(struct file_lock *caller_fl,
+static int posix_locks_deadlock(struct file_lock *caller_fl,
 				struct file_lock *block_fl)
 {
 	struct list_head *tmp;
@@ -722,8 +722,6 @@ next_task:
 	return 0;
 }
 
-EXPORT_SYMBOL(posix_locks_deadlock);
-
 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
  * at the head of the list, but that's secret knowledge known only to
  * flock_lock_file and posix_lock_file.
@@ -794,7 +792,8 @@ out:
 static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
-	struct file_lock *new_fl, *new_fl2;
+	struct file_lock *new_fl = NULL;
+	struct file_lock *new_fl2 = NULL;
 	struct file_lock *left = NULL;
 	struct file_lock *right = NULL;
 	struct file_lock **before;
@@ -803,9 +802,15 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 	/*
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
+	 *
+	 * In some cases we can be sure, that no new locks will be needed
 	 */
-	new_fl = locks_alloc_lock();
-	new_fl2 = locks_alloc_lock();
+	if (!(request->fl_flags & FL_ACCESS) &&
+	    (request->fl_type != F_UNLCK ||
+	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
+		new_fl = locks_alloc_lock();
+		new_fl2 = locks_alloc_lock();
+	}
 
 	lock_kernel();
 	if (request->fl_type != F_UNLCK) {
@@ -834,14 +839,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 	if (request->fl_flags & FL_ACCESS)
 		goto out;
 
-	error = -ENOLCK; /* "no luck" */
-	if (!(new_fl && new_fl2))
-		goto out;
-
 	/*
-	 * We've allocated the new locks in advance, so there are no
-	 * errors possible (and no blocking operations) from here on.
-	 * 
 	 * Find the first old lock with the same owner as the new lock.
 	 */
 	
@@ -938,10 +936,25 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 		before = &fl->fl_next;
 	}
 
+	/*
+	 * The above code only modifies existing locks in case of
+	 * merging or replacing.  If new lock(s) need to be inserted
+	 * all modifications are done bellow this, so it's safe yet to
+	 * bail out.
+	 */
+	error = -ENOLCK; /* "no luck" */
+	if (right && left == right && !new_fl2)
+		goto out;
+
 	error = 0;
 	if (!added) {
 		if (request->fl_type == F_UNLCK)
 			goto out;
+
+		if (!new_fl) {
+			error = -ENOLCK;
+			goto out;
+		}
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
@@ -1881,19 +1894,18 @@ out:
  */
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
-	struct file_lock lock, **before;
+	struct file_lock lock;
 
 	/*
 	 * If there are no locks held on this file, we don't need to call
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	before = &filp->f_dentry->d_inode->i_flock;
-	if (*before == NULL)
+	if (!filp->f_dentry->d_inode->i_flock)
 		return;
 
 	lock.fl_type = F_UNLCK;
-	lock.fl_flags = FL_POSIX;
+	lock.fl_flags = FL_POSIX | FL_CLOSE;
 	lock.fl_start = 0;
 	lock.fl_end = OFFSET_MAX;
 	lock.fl_owner = owner;
@@ -1902,25 +1914,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
 
-	if (filp->f_op && filp->f_op->lock != NULL) {
+	if (filp->f_op && filp->f_op->lock != NULL)
 		filp->f_op->lock(filp, F_SETLK, &lock);
-		goto out;
-	}
+	else
+		posix_lock_file(filp, &lock);
 
-	/* Can't use posix_lock_file here; we need to remove it no matter
-	 * which pid we have.
-	 */
-	lock_kernel();
-	while (*before != NULL) {
-		struct file_lock *fl = *before;
-		if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) {
-			locks_delete_lock(before);
-			continue;
-		}
-		before = &fl->fl_next;
-	}
-	unlock_kernel();
-out:
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
 }
@@ -2206,63 +2204,6 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 
 EXPORT_SYMBOL(lock_may_write);
 
-static inline void __steal_locks(struct file *file, fl_owner_t from)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct file_lock *fl = inode->i_flock;
-
-	while (fl) {
-		if (fl->fl_file == file && fl->fl_owner == from)
-			fl->fl_owner = current->files;
-		fl = fl->fl_next;
-	}
-}
-
-/* When getting ready for executing a binary, we make sure that current
- * has a files_struct on its own. Before dropping the old files_struct,
- * we take over ownership of all locks for all file descriptors we own.
- * Note that we may accidentally steal a lock for a file that a sibling
- * has created since the unshare_files() call.
- */
-void steal_locks(fl_owner_t from)
-{
-	struct files_struct *files = current->files;
-	int i, j;
-	struct fdtable *fdt;
-
-	if (from == files)
-		return;
-
-	lock_kernel();
-	j = 0;
-
-	/*
-	 * We are not taking a ref to the file structures, so
-	 * we need to acquire ->file_lock.
-	 */
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	for (;;) {
-		unsigned long set;
-		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
-			break;
-		set = fdt->open_fds->fds_bits[j++];
-		while (set) {
-			if (set & 1) {
-				struct file *file = fdt->fd[i];
-				if (file)
-					__steal_locks(file, from);
-			}
-			i++;
-			set >>= 1;
-		}
-	}
-	spin_unlock(&files->file_lock);
-	unlock_kernel();
-}
-EXPORT_SYMBOL(steal_locks);
-
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 69224d1fe04..2b0a389d198 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -60,8 +60,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
 static struct page * dir_get_page(struct inode *dir, unsigned long n)
 {
 	struct address_space *mapping = dir->i_mapping;
-	struct page *page = read_cache_page(mapping, n,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+	struct page *page = read_mapping_page(mapping, n, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2dcccf1d1b7..a6fb509b734 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -19,7 +19,7 @@
 
 static void minix_read_inode(struct inode * inode);
 static int minix_write_inode(struct inode * inode, int wait);
-static int minix_statfs(struct super_block *sb, struct kstatfs *buf);
+static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
 static void minix_delete_inode(struct inode *inode)
@@ -296,11 +296,11 @@ out_bad_sb:
 	return -EINVAL;
 }
 
-static int minix_statfs(struct super_block *sb, struct kstatfs *buf)
+static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct minix_sb_info *sbi = minix_sb(sb);
-	buf->f_type = sb->s_magic;
-	buf->f_bsize = sb->s_blocksize;
+	struct minix_sb_info *sbi = minix_sb(dentry->d_sb);
+	buf->f_type = dentry->d_sb->s_magic;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
 	buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
 	buf->f_bfree = minix_count_free_blocks(sbi);
 	buf->f_bavail = buf->f_bfree;
@@ -559,10 +559,11 @@ void minix_truncate(struct inode * inode)
 		V2_minix_truncate(inode);
 }
 
-static struct super_block *minix_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int minix_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
+			   mnt);
 }
 
 static struct file_system_type minix_fs_type = {
diff --git a/fs/mpage.c b/fs/mpage.c
index 9bf2eb30e6f..1e4598247d0 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -707,9 +707,9 @@ mpage_writepages(struct address_space *mapping,
 	struct pagevec pvec;
 	int nr_pages;
 	pgoff_t index;
-	pgoff_t end = -1;		/* Inclusive */
+	pgoff_t end;		/* Inclusive */
 	int scanned = 0;
-	int is_range = 0;
+	int range_whole = 0;
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
 		wbc->encountered_congestion = 1;
@@ -721,16 +721,14 @@ mpage_writepages(struct address_space *mapping,
 		writepage = mapping->a_ops->writepage;
 
 	pagevec_init(&pvec, 0);
-	if (wbc->sync_mode == WB_SYNC_NONE) {
+	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
+		end = -1;
 	} else {
-		index = 0;			  /* whole-file sweep */
-		scanned = 1;
-	}
-	if (wbc->start || wbc->end) {
-		index = wbc->start >> PAGE_CACHE_SHIFT;
-		end = wbc->end >> PAGE_CACHE_SHIFT;
-		is_range = 1;
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
 		scanned = 1;
 	}
 retry:
@@ -759,7 +757,7 @@ retry:
 				continue;
 			}
 
-			if (unlikely(is_range) && page->index > end) {
+			if (!wbc->range_cyclic && page->index > end) {
 				done = 1;
 				unlock_page(page);
 				continue;
@@ -810,7 +808,7 @@ retry:
 		index = 0;
 		goto retry;
 	}
-	if (!is_range)
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = index;
 	if (bio)
 		mpage_bio_submit(WRITE, bio);
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 5b76ccd19e3..9e44158a754 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -661,11 +661,12 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static struct super_block *msdos_get_sb(struct file_system_type *fs_type,
-					int flags, const char *dev_name,
-					void *data)
+static int msdos_get_sb(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
+			   mnt);
 }
 
 static struct file_system_type msdos_fs_type = {
diff --git a/fs/namei.c b/fs/namei.c
index 184fe4acf82..bb4a3e40e43 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2577,8 +2577,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
 {
 	struct page * page;
 	struct address_space *mapping = dentry->d_inode->i_mapping;
-	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
-				NULL);
+	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
 		goto sync_fail;
 	wait_on_page_locked(page);
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb85..c13072a5f1e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -86,6 +86,15 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 	return mnt;
 }
 
+int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
+{
+	mnt->mnt_sb = sb;
+	mnt->mnt_root = dget(sb->s_root);
+	return 0;
+}
+
+EXPORT_SYMBOL(simple_set_mnt);
+
 void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index a1f3e972c6e..90d2ea28f33 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -39,7 +39,7 @@
 
 static void ncp_delete_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
-static int  ncp_statfs(struct super_block *, struct kstatfs *);
+static int  ncp_statfs(struct dentry *, struct kstatfs *);
 
 static kmem_cache_t * ncp_inode_cachep;
 
@@ -724,13 +724,14 @@ static void ncp_put_super(struct super_block *sb)
 	kfree(server);
 }
 
-static int ncp_statfs(struct super_block *sb, struct kstatfs *buf)
+static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct dentry* d;
 	struct inode* i;
 	struct ncp_inode_info* ni;
 	struct ncp_server* s;
 	struct ncp_volume_info vi;
+	struct super_block *sb = dentry->d_sb;
 	int err;
 	__u8 dh;
 	
@@ -957,10 +958,10 @@ out:
 	return result;
 }
 
-static struct super_block *ncp_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ncp_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, ncp_fill_super);
+	return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
 }
 
 static struct file_system_type ncp_fs_type = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index fade02c15e6..fa05c027ea1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -43,7 +43,7 @@ static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
 static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
 static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
 static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
-static int  nfs_file_flush(struct file *);
+static int  nfs_file_flush(struct file *, fl_owner_t id);
 static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
 static int nfs_check_flags(int flags);
 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
@@ -188,7 +188,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
  *
  */
 static int
-nfs_file_flush(struct file *file)
+nfs_file_flush(struct file *file, fl_owner_t id)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
 	struct inode	*inode = file->f_dentry->d_inode;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d0b991a9232..937fbfc381b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -65,7 +65,7 @@ static int nfs_write_inode(struct inode *,int);
 static void nfs_delete_inode(struct inode *);
 static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
-static int  nfs_statfs(struct super_block *, struct kstatfs *);
+static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
@@ -534,8 +534,9 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 }
 
 static int
-nfs_statfs(struct super_block *sb, struct kstatfs *buf)
+nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct nfs_server *server = NFS_SB(sb);
 	unsigned char blockbits;
 	unsigned long blockres;
@@ -1690,8 +1691,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 	return !nfs_compare_fh(&old->fh, &server->fh);
 }
 
-static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data)
+static int nfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	int error;
 	struct nfs_server *server = NULL;
@@ -1699,14 +1700,14 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	struct nfs_fh *root;
 	struct nfs_mount_data *data = raw_data;
 
-	s = ERR_PTR(-EINVAL);
+	error = -EINVAL;
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
 		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 	switch (data->version) {
 		case 1:
@@ -1718,7 +1719,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 				dprintk("%s: mount structure version %d does not support NFSv3\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err;
+				goto out_err_noserver;
 			}
 			data->root.size = NFS2_FHSIZE;
 			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
@@ -1727,24 +1728,24 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 				dprintk("%s: mount structure version %d does not support strong security\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err;
+				goto out_err_noserver;
 			}
 		case 5:
 			memset(data->context, 0, sizeof(data->context));
 	}
 #ifndef CONFIG_NFS_V3
 	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	s = ERR_PTR(-EPROTONOSUPPORT);
+	error = -EPROTONOSUPPORT;
 	if (data->flags & NFS_MOUNT_VER3) {
 		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err;
+		goto out_err_noserver;
 	}
 #endif /* CONFIG_NFS_V3 */
 
-	s = ERR_PTR(-ENOMEM);
+	error = -ENOMEM;
 	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		goto out_err;
+		goto out_err_noserver;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1754,7 +1755,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 		root->size = data->root.size;
 	else
 		root->size = NFS2_FHSIZE;
-	s = ERR_PTR(-EINVAL);
+	error = -EINVAL;
 	if (root->size > sizeof(root->data)) {
 		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
 		goto out_err;
@@ -1770,15 +1771,20 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	}
 
 	/* Fire up rpciod if not yet running */
-	s = ERR_PTR(rpciod_up());
-	if (IS_ERR(s)) {
-		dprintk("%s: couldn't start rpciod! Error = %ld\n",
-				__FUNCTION__, PTR_ERR(s));
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
 		goto out_err;
 	}
 
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
-	if (IS_ERR(s) || s->s_root)
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_rpciod;
+	}
+
+	if (s->s_root)
 		goto out_rpciod_down;
 
 	s->s_flags = flags;
@@ -1787,15 +1793,22 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
+
 out_rpciod_down:
 	rpciod_down();
+	kfree(server);
+	return simple_set_mnt(mnt, s);
+
+out_err_rpciod:
+	rpciod_down();
 out_err:
 	kfree(server);
-	return s;
+out_err_noserver:
+	return error;
 }
 
 static void nfs_kill_super(struct super_block *s)
@@ -2032,8 +2045,8 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 	return dst;
 }
 
-static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data)
+static int nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
 	int error;
 	struct nfs_server *server;
@@ -2043,16 +2056,16 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
 		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2074,33 +2087,41 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	/* We now require that the mount process passes the remote address */
 	if (data->host_addrlen != sizeof(server->addr)) {
-		s = ERR_PTR(-EINVAL);
+		error = -EINVAL;
 		goto out_free;
 	}
 	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		s = ERR_PTR(-EFAULT);
+		error = -EFAULT;
 		goto out_free;
 	}
 	if (server->addr.sin_family != AF_INET ||
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
 		dprintk("%s: mount program didn't pass remote IP address!\n",
 				__FUNCTION__);
-		s = ERR_PTR(-EINVAL);
+		error = -EINVAL;
 		goto out_free;
 	}
 
 	/* Fire up rpciod if not yet running */
-	s = ERR_PTR(rpciod_up());
-	if (IS_ERR(s)) {
-		dprintk("%s: couldn't start rpciod! Error = %ld\n",
-				__FUNCTION__, PTR_ERR(s));
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
 		goto out_free;
 	}
 
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
-	if (IS_ERR(s) || s->s_root)
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
 		goto out_free;
+	}
+
+	if (s->s_root) {
+		kfree(server->mnt_path);
+		kfree(server->hostname);
+		kfree(server);
+		return simple_set_mnt(mnt, s);
+	}
 
 	s->s_flags = flags;
 
@@ -2108,17 +2129,17 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 out_err:
-	s = (struct super_block *)p;
+	error = PTR_ERR(p);
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
 	kfree(server);
-	return s;
+	return error;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index de3998f15f1..5446a0861d1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1310,7 +1310,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
-		status = vfs_statfs(dentry->d_inode->i_sb, &statfs);
+		status = vfs_statfs(dentry, &statfs);
 		if (status)
 			goto out_nfserr;
 	}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3ef017b3b5b..a1810e6a93e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -494,10 +494,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
 }
 
-static struct super_block *nfsd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int nfsd_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, nfsd_fill_super);
+	return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
 }
 
 static struct file_system_type nfsd_fs_type = {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1d65f13f458..245eaa1fb59 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1737,7 +1737,7 @@ int
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 {
 	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
-	if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
+	if (!err && vfs_statfs(fhp->fh_dentry,stat))
 		err = nfserr_io;
 	return err;
 }
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index 3b74e66ca2f..325ce261a10 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -86,8 +86,7 @@ static inline void ntfs_unmap_page(struct page *page)
 static inline struct page *ntfs_map_page(struct address_space *mapping,
 		unsigned long index)
 {
-	struct page *page = read_cache_page(mapping, index,
-			(filler_t*)mapping->a_ops->readpage, NULL);
+	struct page *page = read_mapping_page(mapping, index, NULL);
 
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1663f5c3c6a..6708e1d68a9 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2529,8 +2529,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 	end >>= PAGE_CACHE_SHIFT;
 	/* If there is a first partial page, need to do it the slow way. */
 	if (start_ofs) {
-		page = read_cache_page(mapping, idx,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, idx, NULL);
 		if (IS_ERR(page)) {
 			ntfs_error(vol->sb, "Failed to read first partial "
 					"page (sync error, index 0x%lx).", idx);
@@ -2600,8 +2599,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 	}
 	/* If there is a last partial page, need to do it the slow way. */
 	if (end_ofs) {
-		page = read_cache_page(mapping, idx,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, idx, NULL);
 		if (IS_ERR(page)) {
 			ntfs_error(vol->sb, "Failed to read last partial page "
 					"(sync error, index 0x%lx).", idx);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c63a83e8da9..88292f9e4b9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -231,8 +231,7 @@ do_non_resident_extend:
 		 * Read the page.  If the page is not present, this will zero
 		 * the uninitialized regions for us.
 		 */
-		page = read_cache_page(mapping, index,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+		page = read_mapping_page(mapping, index, NULL);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
 			goto init_err_out;
@@ -1484,14 +1483,15 @@ static inline void ntfs_flush_dcache_pages(struct page **pages,
 		unsigned nr_pages)
 {
 	BUG_ON(!nr_pages);
+	/*
+	 * Warning: Do not do the decrement at the same time as the call to
+	 * flush_dcache_page() because it is a NULL macro on i386 and hence the
+	 * decrement never happens so the loop never terminates.
+	 */
 	do {
-		/*
-		 * Warning: Do not do the decrement at the same time as the
-		 * call because flush_dcache_page() is a NULL macro on i386
-		 * and hence the decrement never happens.
-		 */
+		--nr_pages;
 		flush_dcache_page(pages[nr_pages]);
-	} while (--nr_pages > 0);
+	} while (nr_pages > 0);
 }
 
 /**
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 27833f6df49..0e14acea3f8 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2601,10 +2601,10 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 
 /**
  * ntfs_statfs - return information about mounted NTFS volume
- * @sb:		super block of mounted volume
+ * @dentry:	dentry from mounted volume
  * @sfs:	statfs structure in which to return the information
  *
- * Return information about the mounted NTFS volume @sb in the statfs structure
+ * Return information about the mounted NTFS volume @dentry in the statfs structure
  * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
  * called). We interpret the values to be correct of the moment in time at
  * which we are called. Most values are variable otherwise and this isn't just
@@ -2617,8 +2617,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
  *
  * Return 0 on success or -errno on error.
  */
-static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
+static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 {
+	struct super_block *sb = dentry->d_sb;
 	s64 size;
 	ntfs_volume *vol = NTFS_SB(sb);
 	ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
@@ -3093,10 +3094,11 @@ struct kmem_cache *ntfs_index_ctx_cache;
 /* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
-static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ntfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type ntfs_fs_type = {
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 7e88e24b347..7273d9fa6ba 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -574,10 +574,10 @@ static struct inode_operations dlmfs_file_inode_operations = {
 	.getattr	= simple_getattr,
 };
 
-static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int dlmfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
 }
 
 static struct file_system_type dlmfs_fs_type = {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 949b3dac30f..cdf73393f09 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,7 +100,7 @@ static int ocfs2_initialize_mem_caches(void);
 static void ocfs2_free_mem_caches(void);
 static void ocfs2_delete_osb(struct ocfs2_super *osb);
 
-static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf);
+static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf);
 
 static int ocfs2_sync_fs(struct super_block *sb, int wait);
 
@@ -672,12 +672,14 @@ read_super_error:
 	return status;
 }
 
-static struct super_block *ocfs2_get_sb(struct file_system_type *fs_type,
-					int flags,
-					const char *dev_name,
-					void *data)
+static int ocfs2_get_sb(struct file_system_type *fs_type,
+			int flags,
+			const char *dev_name,
+			void *data,
+			struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
+			   mnt);
 }
 
 static struct file_system_type ocfs2_fs_type = {
@@ -855,7 +857,7 @@ static void ocfs2_put_super(struct super_block *sb)
 	mlog_exit_void();
 }
 
-static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
+static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct ocfs2_super *osb;
 	u32 numbits, freebits;
@@ -864,9 +866,9 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
 	struct buffer_head *bh = NULL;
 	struct inode *inode = NULL;
 
-	mlog_entry("(%p, %p)\n", sb, buf);
+	mlog_entry("(%p, %p)\n", dentry->d_sb, buf);
 
-	osb = OCFS2_SB(sb);
+	osb = OCFS2_SB(dentry->d_sb);
 
 	inode = ocfs2_get_system_file_inode(osb,
 					    GLOBAL_BITMAP_SYSTEM_INODE,
@@ -889,7 +891,7 @@ static int ocfs2_statfs(struct super_block *sb, struct kstatfs *buf)
 	freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
 
 	buf->f_type = OCFS2_SUPER_MAGIC;
-	buf->f_bsize = sb->s_blocksize;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
 	buf->f_namelen = OCFS2_MAX_FILENAME_LEN;
 	buf->f_blocks = ((sector_t) numbits) *
 			(osb->s_clustersize >> osb->sb->s_blocksize_bits);
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index f6986bd79e7..0c8a1294ec9 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -64,8 +64,7 @@ static char *ocfs2_page_getlink(struct dentry * dentry,
 {
 	struct page * page;
 	struct address_space *mapping = dentry->d_inode->i_mapping;
-	page = read_cache_page(mapping, 0,
-			       (filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
 		goto sync_fail;
 	wait_on_page_locked(page);
diff --git a/fs/open.c b/fs/open.c
index 4f178acd4c0..5fb16e5267d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,18 +31,18 @@
 
 #include <asm/unistd.h>
 
-int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
+int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
 
-	if (sb) {
+	if (dentry) {
 		retval = -ENOSYS;
-		if (sb->s_op->statfs) {
+		if (dentry->d_sb->s_op->statfs) {
 			memset(buf, 0, sizeof(*buf));
-			retval = security_sb_statfs(sb);
+			retval = security_sb_statfs(dentry);
 			if (retval)
 				return retval;
-			retval = sb->s_op->statfs(sb, buf);
+			retval = dentry->d_sb->s_op->statfs(dentry, buf);
 			if (retval == 0 && buf->f_frsize == 0)
 				buf->f_frsize = buf->f_bsize;
 		}
@@ -52,12 +52,12 @@ int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
+static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(sb, &st);
+	retval = vfs_statfs(dentry, &st);
 	if (retval)
 		return retval;
 
@@ -95,12 +95,12 @@ static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
 	return 0;
 }
 
-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
+static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(sb, &st);
+	retval = vfs_statfs(dentry, &st);
 	if (retval)
 		return retval;
 
@@ -130,7 +130,7 @@ asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_native(nd.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -149,7 +149,7 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs64(nd.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -168,7 +168,7 @@ asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_native(file->f_dentry, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -189,7 +189,7 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs64(file->f_dentry, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -1152,7 +1152,7 @@ int filp_close(struct file *filp, fl_owner_t id)
 	}
 
 	if (filp->f_op && filp->f_op->flush)
-		retval = filp->f_op->flush(filp);
+		retval = filp->f_op->flush(filp, id);
 
 	dnotify_flush(filp, id);
 	locks_remove_posix(filp, id);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 0f14276a2e5..464e2bce020 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1054,10 +1054,10 @@ out_no_root:
 	return -ENOMEM;
 }
 
-static struct super_block *openprom_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int openprom_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, openprom_fill_super);
+	return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
 }
 
 static struct file_system_type openprom_fs_type = {
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7ef1f094de9..2ef313a96b6 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -329,6 +329,7 @@ void delete_partition(struct gendisk *disk, int part)
 	p->ios[0] = p->ios[1] = 0;
 	p->sectors[0] = p->sectors[1] = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
+	sysfs_remove_link(&p->kobj, "subsystem");
 	if (p->holder_dir)
 		kobject_unregister(p->holder_dir);
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -363,6 +364,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 	kobject_add(&p->kobj);
 	if (!disk->part_uevent_suppress)
 		kobject_uevent(&p->kobj, KOBJ_ADD);
+	sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem");
 	partition_sysfs_add_subdir(p);
 	disk->part[part-1] = p;
 }
@@ -398,6 +400,7 @@ static void disk_sysfs_symlinks(struct gendisk *disk)
 			kfree(disk_name);
 		}
 	}
+	sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, "subsystem");
 }
 
 /* Not exported, helper to add_disk(). */
@@ -481,6 +484,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		sector_t from = state->parts[p].from;
 		if (!size)
 			continue;
+		if (from + size > get_capacity(disk)) {
+			printk(" %s: p%d exceeds device capacity\n",
+				disk->disk_name, p);
+		}
 		add_partition(disk, p, from, size);
 #ifdef CONFIG_BLK_DEV_MD
 		if (state->parts[p].flags)
@@ -496,8 +503,8 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
 	struct page *page;
 
-	page = read_cache_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-			(filler_t *)mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+				 NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		if (!PageUptodate(page))
@@ -548,5 +555,6 @@ void del_gendisk(struct gendisk *disk)
 		put_device(disk->driverfs_dev);
 		disk->driverfs_dev = NULL;
 	}
+	sysfs_remove_link(&disk->kobj, "subsystem");
 	kobject_del(&disk->kobj);
 }
diff --git a/fs/pipe.c b/fs/pipe.c
index 5acd8954aaa..20352573e02 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -979,12 +979,11 @@ no_files:
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
-
-static struct super_block *
-pipefs_get_sb(struct file_system_type *fs_type, int flags,
-	      const char *dev_name, void *data)
+static int pipefs_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
+	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
 }
 
 static struct file_system_type pipe_fs_type = {
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c3fd3611112..9995356ce73 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -26,10 +26,10 @@ struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc
 struct proc_dir_entry *proc_sys_root;
 #endif
 
-static struct super_block *proc_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int proc_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, proc_fill_super);
+	return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
 }
 
 static struct file_system_type proc_fs_type = {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2ecd46f85e9..2f24c46f72a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -128,7 +128,7 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb);
 static void qnx4_destroy_inode(struct inode *inode);
 static void qnx4_read_inode(struct inode *);
 static int qnx4_remount(struct super_block *sb, int *flags, char *data);
-static int qnx4_statfs(struct super_block *, struct kstatfs *);
+static int qnx4_statfs(struct dentry *, struct kstatfs *);
 
 static struct super_operations qnx4_sops =
 {
@@ -282,8 +282,10 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 	return block;
 }
 
-static int qnx4_statfs(struct super_block *sb, struct kstatfs *buf)
+static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	lock_kernel();
 
 	buf->f_type    = sb->s_magic;
@@ -561,10 +563,11 @@ static void destroy_inodecache(void)
 		       "qnx4_inode_cache: not all structures were freed\n");
 }
 
-static struct super_block *qnx4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int qnx4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
+			   mnt);
 }
 
 static struct file_system_type qnx4_fs_type = {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 14bd2246fb6..b9677335cc8 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -185,16 +185,17 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
 	return 0;
 }
 
-struct super_block *ramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+int ramfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super);
+	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
 }
 
-static struct super_block *rootfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int rootfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
+	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
+			    mnt);
 }
 
 static struct file_system_type ramfs_fs_type = {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index cae2abbc0c7..00f1321e920 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -60,7 +60,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
 }
 
 static int reiserfs_remount(struct super_block *s, int *flags, char *data);
-static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf);
+static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
 
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
@@ -1938,15 +1938,15 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	return errval;
 }
 
-static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf)
+static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
+	struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb);
 
 	buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
 	buf->f_bfree = sb_free_blocks(rs);
 	buf->f_bavail = buf->f_bfree;
 	buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
-	buf->f_bsize = s->s_blocksize;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
 	/* changed to accommodate gcc folks. */
 	buf->f_type = REISERFS_SUPER_MAGIC;
 	return 0;
@@ -2249,11 +2249,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 
 #endif
 
-static struct super_block *get_super_block(struct file_system_type *fs_type,
-					   int flags, const char *dev_name,
-					   void *data)
+static int get_super_block(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
+			   mnt);
 }
 
 static int __init init_reiserfs_fs(void)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ffb79c48c5b..39fedaa88a0 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -452,8 +452,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
 	/* We can deadlock if we try to free dentries,
 	   and an unlink/rmdir has just occured - GFP_NOFS avoids this */
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
-	page = read_cache_page(mapping, n,
-			       (filler_t *) mapping->a_ops->readpage, NULL);
+	page = read_mapping_page(mapping, n, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 9b9eda7b335..283fbc6b8ee 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -179,12 +179,12 @@ outnobh:
 /* That's simple too. */
 
 static int
-romfs_statfs(struct super_block *sb, struct kstatfs *buf)
+romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	buf->f_type = ROMFS_MAGIC;
 	buf->f_bsize = ROMBSIZE;
 	buf->f_bfree = buf->f_bavail = buf->f_ffree;
-	buf->f_blocks = (romfs_maxsize(sb)+ROMBSIZE-1)>>ROMBSBITS;
+	buf->f_blocks = (romfs_maxsize(dentry->d_sb)+ROMBSIZE-1)>>ROMBSBITS;
 	buf->f_namelen = ROMFS_MAXFN;
 	return 0;
 }
@@ -607,10 +607,11 @@ static struct super_operations romfs_ops = {
 	.remount_fs	= romfs_remount,
 };
 
-static struct super_block *romfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int romfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super,
+			   mnt);
 }
 
 static struct file_system_type romfs_fs_type = {
diff --git a/fs/select.c b/fs/select.c
index a8109baa5e4..9c4f0f2604f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -546,37 +546,38 @@ struct poll_list {
 
 #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
 
-static void do_pollfd(unsigned int num, struct pollfd * fdpage,
-	poll_table ** pwait, int *count)
+/*
+ * Fish for pollable events on the pollfd->fd file descriptor. We're only
+ * interested in events matching the pollfd->events mask, and the result
+ * matching that mask is both recorded in pollfd->revents and returned. The
+ * pwait poll_table will be used by the fd-provided poll handler for waiting,
+ * if non-NULL.
+ */
+static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
 {
-	int i;
-
-	for (i = 0; i < num; i++) {
-		int fd;
-		unsigned int mask;
-		struct pollfd *fdp;
-
-		mask = 0;
-		fdp = fdpage+i;
-		fd = fdp->fd;
-		if (fd >= 0) {
-			int fput_needed;
-			struct file * file = fget_light(fd, &fput_needed);
-			mask = POLLNVAL;
-			if (file != NULL) {
-				mask = DEFAULT_POLLMASK;
-				if (file->f_op && file->f_op->poll)
-					mask = file->f_op->poll(file, *pwait);
-				mask &= fdp->events | POLLERR | POLLHUP;
-				fput_light(file, fput_needed);
-			}
-			if (mask) {
-				*pwait = NULL;
-				(*count)++;
-			}
+	unsigned int mask;
+	int fd;
+
+	mask = 0;
+	fd = pollfd->fd;
+	if (fd >= 0) {
+		int fput_needed;
+		struct file * file;
+
+		file = fget_light(fd, &fput_needed);
+		mask = POLLNVAL;
+		if (file != NULL) {
+			mask = DEFAULT_POLLMASK;
+			if (file->f_op && file->f_op->poll)
+				mask = file->f_op->poll(file, pwait);
+			/* Mask out unneeded events. */
+			mask &= pollfd->events | POLLERR | POLLHUP;
+			fput_light(file, fput_needed);
 		}
-		fdp->revents = mask;
 	}
+	pollfd->revents = mask;
+
+	return mask;
 }
 
 static int do_poll(unsigned int nfds,  struct poll_list *list,
@@ -594,11 +595,29 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 		long __timeout;
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		walk = list;
-		while(walk != NULL) {
-			do_pollfd( walk->len, walk->entries, &pt, &count);
-			walk = walk->next;
+		for (walk = list; walk != NULL; walk = walk->next) {
+			struct pollfd * pfd, * pfd_end;
+
+			pfd = walk->entries;
+			pfd_end = pfd + walk->len;
+			for (; pfd != pfd_end; pfd++) {
+				/*
+				 * Fish for events. If we found one, record it
+				 * and kill the poll_table, so we don't
+				 * needlessly register any other waiters after
+				 * this. They'll get immediately deregistered
+				 * when we break out and return.
+				 */
+				if (do_pollfd(pfd, pt)) {
+					count++;
+					pt = NULL;
+				}
+			}
 		}
+		/*
+		 * All waiters have already been registered, so don't provide
+		 * a poll_table to them on the next loop iteration.
+		 */
 		pt = NULL;
 		if (count || !*timeout || signal_pending(current))
 			break;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fdeabc0a34f..506ff87c1d4 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -48,7 +48,7 @@
 
 static void smb_delete_inode(struct inode *);
 static void smb_put_super(struct super_block *);
-static int  smb_statfs(struct super_block *, struct kstatfs *);
+static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
 
 static kmem_cache_t *smb_inode_cachep;
@@ -641,13 +641,13 @@ out_no_server:
 }
 
 static int
-smb_statfs(struct super_block *sb, struct kstatfs *buf)
+smb_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int result;
 	
 	lock_kernel();
 
-	result = smb_proc_dskattr(sb, buf);
+	result = smb_proc_dskattr(dentry, buf);
 
 	unlock_kernel();
 
@@ -782,10 +782,10 @@ out:
 	return error;
 }
 
-static struct super_block *smb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int smb_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_nodev(fs_type, flags, data, smb_fill_super);
+	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
 }
 
 static struct file_system_type smb_fs_type = {
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index b1b878b8173..c3495059889 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -3226,9 +3226,9 @@ smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
 }
 
 int
-smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr)
+smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
 {
-	struct smb_sb_info *server = SMB_SB(sb);
+	struct smb_sb_info *server = SMB_SB(dentry->d_sb);
 	int result;
 	char *p;
 	long unit;
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 47664597e6b..972ed7dad38 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -29,7 +29,7 @@ extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
 extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
 extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
 extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct super_block *sb, struct kstatfs *attr);
+extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
 extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
 extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
 extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
diff --git a/fs/splice.c b/fs/splice.c
index a285fd746dc..05fd2787be9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -55,31 +55,43 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 				     struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping;
 
 	lock_page(page);
 
-	WARN_ON(!PageUptodate(page));
+	mapping = page_mapping(page);
+	if (mapping) {
+		WARN_ON(!PageUptodate(page));
 
-	/*
-	 * At least for ext2 with nobh option, we need to wait on writeback
-	 * completing on this page, since we'll remove it from the pagecache.
-	 * Otherwise truncate wont wait on the page, allowing the disk
-	 * blocks to be reused by someone else before we actually wrote our
-	 * data to them. fs corruption ensues.
-	 */
-	wait_on_page_writeback(page);
+		/*
+		 * At least for ext2 with nobh option, we need to wait on
+		 * writeback completing on this page, since we'll remove it
+		 * from the pagecache.  Otherwise truncate wont wait on the
+		 * page, allowing the disk blocks to be reused by someone else
+		 * before we actually wrote our data to them. fs corruption
+		 * ensues.
+		 */
+		wait_on_page_writeback(page);
 
-	if (PagePrivate(page))
-		try_to_release_page(page, mapping_gfp_mask(mapping));
+		if (PagePrivate(page))
+			try_to_release_page(page, mapping_gfp_mask(mapping));
 
-	if (!remove_mapping(mapping, page)) {
-		unlock_page(page);
-		return 1;
+		/*
+		 * If we succeeded in removing the mapping, set LRU flag
+		 * and return good.
+		 */
+		if (remove_mapping(mapping, page)) {
+			buf->flags |= PIPE_BUF_FLAG_LRU;
+			return 0;
+		}
 	}
 
-	buf->flags |= PIPE_BUF_FLAG_LRU;
-	return 0;
+	/*
+	 * Raced with truncate or failed to remove page from current
+	 * address space, unlock and return failure.
+	 */
+	unlock_page(page);
+	return 1;
 }
 
 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
diff --git a/fs/super.c b/fs/super.c
index a66f66bb804..057b5325b7e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -231,7 +231,7 @@ void generic_shutdown_super(struct super_block *sb)
 	if (root) {
 		sb->s_root = NULL;
 		shrink_dcache_parent(root);
-		shrink_dcache_anon(&sb->s_anon);
+		shrink_dcache_sb(sb);
 		dput(root);
 		fsync_super(sb);
 		lock_super(sb);
@@ -486,7 +486,7 @@ asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf)
         s = user_get_super(new_decode_dev(dev));
         if (s == NULL)
                 goto out;
-	err = vfs_statfs(s, &sbuf);
+	err = vfs_statfs(s->s_root, &sbuf);
 	drop_super(s);
 	if (err)
 		goto out;
@@ -676,9 +676,10 @@ static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
 	}
 }
 
-struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	struct block_device *bdev;
 	struct super_block *s;
@@ -686,7 +687,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 
 	bdev = open_bdev_excl(dev_name, flags, fs_type);
 	if (IS_ERR(bdev))
-		return (struct super_block *)bdev;
+		return PTR_ERR(bdev);
 
 	/*
 	 * once the super is inserted into the list by sget, s_umount
@@ -697,15 +698,17 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
 	mutex_unlock(&bdev->bd_mount_mutex);
 	if (IS_ERR(s))
-		goto out;
+		goto error_s;
 
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & MS_RDONLY) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			s = ERR_PTR(-EBUSY);
+			error = -EBUSY;
+			goto error_bdev;
 		}
-		goto out;
+
+		close_bdev_excl(bdev);
 	} else {
 		char b[BDEVNAME_SIZE];
 
@@ -716,18 +719,21 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			s = ERR_PTR(error);
-		} else {
-			s->s_flags |= MS_ACTIVE;
-			bdev_uevent(bdev, KOBJ_MOUNT);
+			goto error;
 		}
+
+		s->s_flags |= MS_ACTIVE;
+		bdev_uevent(bdev, KOBJ_MOUNT);
 	}
 
-	return s;
+	return simple_set_mnt(mnt, s);
 
-out:
+error_s:
+	error = PTR_ERR(s);
+error_bdev:
 	close_bdev_excl(bdev);
-	return s;
+error:
+	return error;
 }
 
 EXPORT_SYMBOL(get_sb_bdev);
@@ -744,15 +750,16 @@ void kill_block_super(struct super_block *sb)
 
 EXPORT_SYMBOL(kill_block_super);
 
-struct super_block *get_sb_nodev(struct file_system_type *fs_type,
+int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	int error;
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 
 	s->s_flags = flags;
 
@@ -760,10 +767,10 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type,
 	if (error) {
 		up_write(&s->s_umount);
 		deactivate_super(s);
-		return ERR_PTR(error);
+		return error;
 	}
 	s->s_flags |= MS_ACTIVE;
-	return s;
+	return simple_set_mnt(mnt, s);
 }
 
 EXPORT_SYMBOL(get_sb_nodev);
@@ -773,94 +780,102 @@ static int compare_single(struct super_block *s, void *p)
 	return 1;
 }
 
-struct super_block *get_sb_single(struct file_system_type *fs_type,
+int get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int))
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
 {
 	struct super_block *s;
 	int error;
 
 	s = sget(fs_type, compare_single, set_anon_super, NULL);
 	if (IS_ERR(s))
-		return s;
+		return PTR_ERR(s);
 	if (!s->s_root) {
 		s->s_flags = flags;
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			up_write(&s->s_umount);
 			deactivate_super(s);
-			return ERR_PTR(error);
+			return error;
 		}
 		s->s_flags |= MS_ACTIVE;
 	}
 	do_remount_sb(s, flags, data, 0);
-	return s;
+	return simple_set_mnt(mnt, s);
 }
 
 EXPORT_SYMBOL(get_sb_single);
 
 struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
+vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
-	struct file_system_type *type = get_fs_type(fstype);
-	struct super_block *sb = ERR_PTR(-ENOMEM);
 	struct vfsmount *mnt;
-	int error;
 	char *secdata = NULL;
+	int error;
 
 	if (!type)
 		return ERR_PTR(-ENODEV);
 
+	error = -ENOMEM;
 	mnt = alloc_vfsmnt(name);
 	if (!mnt)
 		goto out;
 
 	if (data) {
 		secdata = alloc_secdata();
-		if (!secdata) {
-			sb = ERR_PTR(-ENOMEM);
+		if (!secdata)
 			goto out_mnt;
-		}
 
 		error = security_sb_copy_data(type, data, secdata);
-		if (error) {
-			sb = ERR_PTR(error);
+		if (error)
 			goto out_free_secdata;
-		}
 	}
 
-	sb = type->get_sb(type, flags, name, data);
-	if (IS_ERR(sb))
+	error = type->get_sb(type, flags, name, data, mnt);
+	if (error < 0)
 		goto out_free_secdata;
- 	error = security_sb_kern_mount(sb, secdata);
+
+ 	error = security_sb_kern_mount(mnt->mnt_sb, secdata);
  	if (error)
  		goto out_sb;
-	mnt->mnt_sb = sb;
-	mnt->mnt_root = dget(sb->s_root);
-	mnt->mnt_mountpoint = sb->s_root;
+
+	mnt->mnt_mountpoint = mnt->mnt_root;
 	mnt->mnt_parent = mnt;
-	up_write(&sb->s_umount);
+	up_write(&mnt->mnt_sb->s_umount);
 	free_secdata(secdata);
-	put_filesystem(type);
 	return mnt;
 out_sb:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	sb = ERR_PTR(error);
+	dput(mnt->mnt_root);
+	up_write(&mnt->mnt_sb->s_umount);
+	deactivate_super(mnt->mnt_sb);
 out_free_secdata:
 	free_secdata(secdata);
 out_mnt:
 	free_vfsmnt(mnt);
 out:
+	return ERR_PTR(error);
+}
+
+EXPORT_SYMBOL_GPL(vfs_kern_mount);
+
+struct vfsmount *
+do_kern_mount(const char *fstype, int flags, const char *name, void *data)
+{
+	struct file_system_type *type = get_fs_type(fstype);
+	struct vfsmount *mnt;
+	if (!type)
+		return ERR_PTR(-ENODEV);
+	mnt = vfs_kern_mount(type, flags, name, data);
 	put_filesystem(type);
-	return (struct vfsmount *)sb;
+	return mnt;
 }
 
 EXPORT_SYMBOL_GPL(do_kern_mount);
 
 struct vfsmount *kern_mount(struct file_system_type *type)
 {
-	return do_kern_mount(type->name, 0, type->name, NULL);
+	return vfs_kern_mount(type, 0, type->name, NULL);
 }
 
 EXPORT_SYMBOL(kern_mount);
diff --git a/fs/sync.c b/fs/sync.c
index aab5ffe77e9..955aef04da2 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -100,7 +100,7 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 	}
 
 	if (nbytes == 0)
-		endbyte = -1;
+		endbyte = LLONG_MAX;
 	else
 		endbyte--;		/* inclusive */
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f1117e885bd..40190c48927 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -66,10 +66,10 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static struct super_block *sysfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sysfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
+	return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
 }
 
 static struct file_system_type sysfs_fs_type = {
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index d7074341ee8..f2bef962d30 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -53,8 +53,7 @@ static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
 static struct page * dir_get_page(struct inode *dir, unsigned long n)
 {
 	struct address_space *mapping = dir->i_mapping;
-	struct page *page = read_cache_page(mapping, n,
-				(filler_t*)mapping->a_ops->readpage, NULL);
+	struct page *page = read_mapping_page(mapping, n, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3ff89cc5833..58b2d22142b 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -85,8 +85,9 @@ static void sysv_put_super(struct super_block *sb)
 	kfree(sbi);
 }
 
-static int sysv_statfs(struct super_block *sb, struct kstatfs *buf)
+static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
 	buf->f_type = sb->s_magic;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index e92b991e6dd..876639b9332 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -506,16 +506,17 @@ failed:
 
 /* Every kernel module contains stuff like this. */
 
-static struct super_block *sysv_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sysv_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
+			   mnt);
 }
 
-static struct super_block *v7_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int v7_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
 }
 
 static struct file_system_type sysv_fs_type = {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e45789fe38e..44fe2cb0bbb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -91,13 +91,13 @@ static void udf_load_partdesc(struct super_block *, struct buffer_head *);
 static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
-static int udf_statfs(struct super_block *, struct kstatfs *);
+static int udf_statfs(struct dentry *, struct kstatfs *);
 
 /* UDF filesystem type */
-static struct super_block *udf_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int udf_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
 }
 
 static struct file_system_type udf_fstype = {
@@ -1779,8 +1779,10 @@ udf_put_super(struct super_block *sb)
  *	Written, tested, and released.
  */
 static int
-udf_statfs(struct super_block *sb, struct kstatfs *buf)
+udf_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
+
 	buf->f_type = UDF_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = UDF_SB_PARTLEN(sb, UDF_SB_PARTITION(sb));
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index db98a4c71e6..fe5ab2aa289 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1113,8 +1113,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	return 0;
 }
 
-static int ufs_statfs (struct super_block *sb, struct kstatfs *buf)
+static int ufs_statfs (struct dentry *dentry, struct kstatfs *buf)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block_first * usb1;
 	struct ufs_super_block * usb;
@@ -1311,10 +1312,10 @@ out:
 
 #endif
 
-static struct super_block *ufs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ufs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
 }
 
 static struct file_system_type ufs_fs_type = {
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index a56cec3be5f..9a8f48bae95 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -1023,11 +1023,12 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static struct super_block *vfat_get_sb(struct file_system_type *fs_type,
-				       int flags, const char *dev_name,
-				       void *data)
+static int vfat_get_sb(struct file_system_type *fs_type,
+		       int flags, const char *dev_name,
+		       void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
+			   mnt);
 }
 
 static struct file_system_type vfat_fs_type = {
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index bac27d66151..26b364c9d62 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,5 @@
 config XFS_FS
 	tristate "XFS filesystem support"
-	select EXPORTFS if NFSD!=n
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can
@@ -18,11 +17,6 @@ config XFS_FS
 	  system of your root partition is compiled as a module, you'll need
 	  to use an initial ramdisk (initrd) to boot.
 
-config XFS_EXPORT
-	bool
-	depends on XFS_FS && EXPORTFS
-	default y
-
 config XFS_QUOTA
 	bool "XFS Quota support"
 	depends on XFS_FS
@@ -65,18 +59,19 @@ config XFS_POSIX_ACL
 	  If you don't know what Access Control Lists are, say N.
 
 config XFS_RT
-	bool "XFS Realtime support (EXPERIMENTAL)"
-	depends on XFS_FS && EXPERIMENTAL
+	bool "XFS Realtime subvolume support"
+	depends on XFS_FS
 	help
 	  If you say Y here you will be able to mount and use XFS filesystems
-	  which contain a realtime subvolume. The realtime subvolume is a
-	  separate area of disk space where only file data is stored. The
-	  realtime subvolume is designed to provide very deterministic
-	  data rates suitable for media streaming applications.
-
-	  See the xfs man page in section 5 for a bit more information.
+	  which contain a realtime subvolume.  The realtime subvolume is a
+	  separate area of disk space where only file data is stored.  It was
+	  originally designed to provide deterministic data rates suitable
+	  for media streaming applications, but is also useful as a generic
+	  mechanism for ensuring data and metadata/log I/Os are completely
+	  separated.  Regular file I/Os are isolated to a separate device
+	  from all other requests, and this can be done quite transparently
+	  to applications via the inherit-realtime directory inode flag.
 
-	  This feature is unsupported at this time, is not yet fully
-	  functional, and may cause serious problems.
+	  See the xfs man page in section 5 for additional information.
 
 	  If unsure, say N.
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 5d73eaa1971..9e7f85986d0 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -59,7 +59,6 @@ xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
 xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
-xfs-$(CONFIG_XFS_EXPORT)	+= $(XFS_LINUX)/xfs_export.o
 
 
 xfs-y				+= xfs_alloc.o \
@@ -73,14 +72,12 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_btree.o \
 				   xfs_buf_item.o \
 				   xfs_da_btree.o \
-				   xfs_dir.o \
 				   xfs_dir2.o \
 				   xfs_dir2_block.o \
 				   xfs_dir2_data.o \
 				   xfs_dir2_leaf.o \
 				   xfs_dir2_node.o \
 				   xfs_dir2_sf.o \
-				   xfs_dir_leaf.o \
 				   xfs_error.o \
 				   xfs_extfree_item.o \
 				   xfs_fsops.o \
@@ -117,6 +114,7 @@ xfs-y				+= $(addprefix $(XFS_LINUX)/, \
 				   kmem.o \
 				   xfs_aops.o \
 				   xfs_buf.o \
+				   xfs_export.o \
 				   xfs_file.o \
 				   xfs_fs_subr.o \
 				   xfs_globals.o \
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 2cfd33d4d8a..939bd84bc7e 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -23,42 +23,6 @@
 #include <linux/mm.h>
 
 /*
- * Process flags handling
- */
-
-#define PFLAGS_TEST_NOIO()              (current->flags & PF_NOIO)
-#define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
-
-#define PFLAGS_SET_NOIO() do {		\
-	current->flags |= PF_NOIO;	\
-} while (0)
-
-#define PFLAGS_CLEAR_NOIO() do {	\
-	current->flags &= ~PF_NOIO;	\
-} while (0)
-
-/* these could be nested, so we save state */
-#define PFLAGS_SET_FSTRANS(STATEP) do {	\
-	*(STATEP) = current->flags;	\
-	current->flags |= PF_FSTRANS;	\
-} while (0)
-
-#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
-	*(STATEP) = current->flags;	\
-	current->flags &= ~PF_FSTRANS;	\
-} while (0)
-
-/* Restore the PF_FSTRANS state to what was saved in STATEP */
-#define PFLAGS_RESTORE_FSTRANS(STATEP) do {     		\
-	current->flags = ((current->flags & ~PF_FSTRANS) |	\
-			  (*(STATEP) & PF_FSTRANS));		\
-} while (0)
-
-#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
-	*(NSTATEP) = *(OSTATEP);	\
-} while (0)
-
-/*
  * General memory allocation interfaces
  */
 
@@ -83,7 +47,7 @@ kmem_flags_convert(unsigned int __nocast flags)
 		lflags = GFP_ATOMIC | __GFP_NOWARN;
 	} else {
 		lflags = GFP_KERNEL | __GFP_NOWARN;
-		if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+		if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
 			lflags &= ~__GFP_FS;
 	}
 	return lflags;
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 1b262b790d9..32e1ce0f04c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -28,7 +28,7 @@ typedef struct {
 } mrlock_t;
 
 #define mrinit(mrp, name)	\
-	( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+	do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
 #define mrlock_init(mrp, t,n,s)	mrinit(mrp, n)
 #define mrfree(mrp)		do { } while (0)
 #define mraccess(mrp)		mraccessf(mrp, 0)
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index 194a84490bd..b25090094cc 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -34,20 +34,21 @@ typedef struct semaphore sema_t;
 #define initnsema(sp, val, name)	sema_init(sp, val)
 #define psema(sp, b)			down(sp)
 #define vsema(sp)			up(sp)
-#define valusema(sp)			(atomic_read(&(sp)->count))
-#define freesema(sema)
+#define freesema(sema)			do { } while (0)
+
+static inline int issemalocked(sema_t *sp)
+{
+	return down_trylock(sp) || (up(sp), 0);
+}
 
 /*
  * Map cpsema (try to get the sema) to down_trylock. We need to switch
  * the return values since cpsema returns 1 (acquired) 0 (failed) and
  * down_trylock returns the reverse 0 (acquired) 1 (failed).
  */
-
-#define cpsema(sp)			(down_trylock(sp) ? 0 : 1)
-
-/*
- * Didn't do cvsema(sp). Not sure how to map this to up/down/...
- * It does a vsema if the values is < 0 other wise nothing.
- */
+static inline int cpsema(sema_t *sp)
+{
+	return down_trylock(sp) ? 0 : 1;
+}
 
 #endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 4d191ef39b6..3e807b828e2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,7 +21,6 @@
 #include "xfs_inum.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_trans.h"
 #include "xfs_dmapi.h"
@@ -29,7 +28,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -76,7 +74,7 @@ xfs_page_trace(
 	int		mask)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	loff_t		isize = i_size_read(inode);
 	loff_t		offset = page_offset(page);
 	int		delalloc = -1, unmapped = -1, unwritten = -1;
@@ -136,9 +134,10 @@ xfs_destroy_ioend(
 
 	for (bh = ioend->io_buffer_head; bh; bh = next) {
 		next = bh->b_private;
-		bh->b_end_io(bh, ioend->io_uptodate);
+		bh->b_end_io(bh, !ioend->io_error);
 	}
-
+	if (unlikely(ioend->io_error))
+		vn_ioerror(ioend->io_vnode, ioend->io_error, __FILE__,__LINE__);
 	vn_iowake(ioend->io_vnode);
 	mempool_free(ioend, xfs_ioend_pool);
 }
@@ -180,13 +179,12 @@ xfs_end_bio_unwritten(
 	void			*data)
 {
 	xfs_ioend_t		*ioend = data;
-	vnode_t			*vp = ioend->io_vnode;
+	bhv_vnode_t		*vp = ioend->io_vnode;
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
-	int			error;
 
-	if (ioend->io_uptodate)
-		VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+	if (likely(!ioend->io_error))
+		bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL);
 	xfs_destroy_ioend(ioend);
 }
 
@@ -211,7 +209,7 @@ xfs_alloc_ioend(
 	 * all the I/O from calling the completion routine too early.
 	 */
 	atomic_set(&ioend->io_remaining, 1);
-	ioend->io_uptodate = 1; /* cleared if any I/O fails */
+	ioend->io_error = 0;
 	ioend->io_list = NULL;
 	ioend->io_type = type;
 	ioend->io_vnode = vn_from_inode(inode);
@@ -239,10 +237,10 @@ xfs_map_blocks(
 	xfs_iomap_t		*mapp,
 	int			flags)
 {
-	vnode_t			*vp = vn_from_inode(inode);
+	bhv_vnode_t		*vp = vn_from_inode(inode);
 	int			error, nmaps = 1;
 
-	VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
+	error = bhv_vop_bmap(vp, offset, count, flags, mapp, &nmaps);
 	if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
 		VMODIFY(vp);
 	return -error;
@@ -271,16 +269,14 @@ xfs_end_bio(
 	if (bio->bi_size)
 		return 1;
 
-	ASSERT(ioend);
 	ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+	ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
 
 	/* Toss bio and pass work off to an xfsdatad thread */
-	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-		ioend->io_uptodate = 0;
 	bio->bi_private = NULL;
 	bio->bi_end_io = NULL;
-
 	bio_put(bio);
+
 	xfs_finish_ioend(ioend);
 	return 0;
 }
@@ -1127,7 +1123,7 @@ xfs_vm_writepage(
 	 * then mark the page dirty again and leave the page
 	 * as is.
 	 */
-	if (PFLAGS_TEST_FSTRANS() && need_trans)
+	if (current_test_flags(PF_FSTRANS) && need_trans)
 		goto out_fail;
 
 	/*
@@ -1158,6 +1154,18 @@ out_unlock:
 	return error;
 }
 
+STATIC int
+xfs_vm_writepages(
+	struct address_space	*mapping,
+	struct writeback_control *wbc)
+{
+	struct bhv_vnode	*vp = vn_from_inode(mapping->host);
+
+	if (VN_TRUNC(vp))
+		VUNTRUNCATE(vp);
+	return generic_writepages(mapping, wbc);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. Possibly the page is already clean. We always
@@ -1204,7 +1212,7 @@ xfs_vm_releasepage(
 	/* If we are already inside a transaction or the thread cannot
 	 * do I/O, we cannot release this page.
 	 */
-	if (PFLAGS_TEST_FSTRANS())
+	if (current_test_flags(PF_FSTRANS))
 		return 0;
 
 	/*
@@ -1231,7 +1239,7 @@ __xfs_get_blocks(
 	int			direct,
 	bmapi_flags_t		flags)
 {
-	vnode_t			*vp = vn_from_inode(inode);
+	bhv_vnode_t		*vp = vn_from_inode(inode);
 	xfs_iomap_t		iomap;
 	xfs_off_t		offset;
 	ssize_t			size;
@@ -1241,8 +1249,8 @@ __xfs_get_blocks(
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
 	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
 	size = bh_result->b_size;
-	VOP_BMAP(vp, offset, size,
-		create ? flags : BMAPI_READ, &iomap, &niomap, error);
+	error = bhv_vop_bmap(vp, offset, size,
+			     create ? flags : BMAPI_READ, &iomap, &niomap);
 	if (error)
 		return -error;
 	if (niomap == 0)
@@ -1370,13 +1378,13 @@ xfs_vm_direct_IO(
 {
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	xfs_iomap_t	iomap;
 	int		maps = 1;
 	int		error;
 	ssize_t		ret;
 
-	VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
+	error = bhv_vop_bmap(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps);
 	if (error)
 		return -error;
 
@@ -1409,14 +1417,12 @@ xfs_vm_bmap(
 	sector_t		block)
 {
 	struct inode		*inode = (struct inode *)mapping->host;
-	vnode_t			*vp = vn_from_inode(inode);
-	int			error;
+	bhv_vnode_t		*vp = vn_from_inode(inode);
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-
-	VOP_RWLOCK(vp, VRWLOCK_READ);
-	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
-	VOP_RWUNLOCK(vp, VRWLOCK_READ);
+	bhv_vop_rwlock(vp, VRWLOCK_READ);
+	bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
+	bhv_vop_rwunlock(vp, VRWLOCK_READ);
 	return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
@@ -1452,6 +1458,7 @@ struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
+	.writepages		= xfs_vm_writepages,
 	.sync_page		= block_sync_page,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 60716543c68..706d8c781b8 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Silicon Graphics, Inc.
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -30,9 +30,9 @@ typedef void (*xfs_ioend_func_t)(void *);
 typedef struct xfs_ioend {
 	struct xfs_ioend	*io_list;	/* next ioend in chain */
 	unsigned int		io_type;	/* delalloc / unwritten */
-	unsigned int		io_uptodate;	/* I/O status register */
+	int			io_error;	/* I/O error code */
 	atomic_t		io_remaining;	/* hold count */
-	struct vnode		*io_vnode;	/* file being written to */
+	struct bhv_vnode	*io_vnode;	/* file being written to */
 	struct buffer_head	*io_buffer_head;/* buffer linked list head */
 	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
 	size_t			io_size;	/* size of the extent */
@@ -43,4 +43,4 @@ typedef struct xfs_ioend {
 extern struct address_space_operations xfs_address_space_operations;
 extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 
-#endif /* __XFS_IOPS_H__ */
+#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index b768ea910bb..5fb75d9151f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -21,7 +21,6 @@
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_mount.h"
 #include "xfs_export.h"
 
@@ -97,7 +96,7 @@ xfs_fs_encode_fh(
 	int			len;
 	int			is64 = 0;
 #if XFS_BIG_INUMS
-	vfs_t			*vfs = vfs_from_sb(inode->i_sb);
+	bhv_vfs_t		*vfs = vfs_from_sb(inode->i_sb);
 
 	if (!(vfs->vfs_flag & VFS_32BITINODES)) {
 		/* filesystem may contain 64bit inode numbers */
@@ -136,13 +135,13 @@ xfs_fs_get_dentry(
 	struct super_block	*sb,
 	void			*data)
 {
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	struct inode		*inode;
 	struct dentry		*result;
-	vfs_t			*vfsp = vfs_from_sb(sb);
+	bhv_vfs_t		*vfsp = vfs_from_sb(sb);
 	int			error;
 
-	VFS_VGET(vfsp, &vp, (fid_t *)data, error);
+	error = bhv_vfs_vget(vfsp, &vp, (fid_t *)data);
 	if (error || vp == NULL)
 		return ERR_PTR(-ESTALE) ;
 
@@ -160,12 +159,12 @@ xfs_fs_get_parent(
 	struct dentry		*child)
 {
 	int			error;
-	vnode_t			*vp, *cvp;
+	bhv_vnode_t		*vp, *cvp;
 	struct dentry		*parent;
 
 	cvp = NULL;
 	vp = vn_from_inode(child->d_inode);
-	VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+	error = bhv_vop_lookup(vp, &dotdot, &cvp, 0, NULL, NULL);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c847416f6d1..70662371bb1 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -21,7 +21,6 @@
 #include "xfs_inum.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_trans.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
 #include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -58,15 +56,12 @@ __xfs_file_read(
 {
 	struct iovec		iov = {buf, count};
 	struct file		*file = iocb->ki_filp;
-	vnode_t			*vp = vn_from_inode(file->f_dentry->d_inode);
-	ssize_t			rval;
+	bhv_vnode_t		*vp = vn_from_inode(file->f_dentry->d_inode);
 
 	BUG_ON(iocb->ki_pos != pos);
-
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
-	return rval;
+	return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL);
 }
 
 STATIC ssize_t
@@ -100,15 +95,12 @@ __xfs_file_write(
 	struct iovec	iov = {(void __user *)buf, count};
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = vn_from_inode(inode);
-	ssize_t		rval;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-
-	VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
-	return rval;
+	return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL);
 }
 
 STATIC ssize_t
@@ -140,7 +132,7 @@ __xfs_file_readv(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -149,7 +141,8 @@ __xfs_file_readv(
 
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
-	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	rval = bhv_vop_read(vp, &kiocb, iov, nr_segs,
+				&kiocb.ki_pos, ioflags, NULL);
 
 	*ppos = kiocb.ki_pos;
 	return rval;
@@ -184,7 +177,7 @@ __xfs_file_writev(
 	loff_t			*ppos)
 {
 	struct inode	*inode = file->f_mapping->host;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	struct kiocb	kiocb;
 	ssize_t		rval;
 
@@ -193,7 +186,8 @@ __xfs_file_writev(
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
 
-	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+	rval = bhv_vop_write(vp, &kiocb, iov, nr_segs,
+				 &kiocb.ki_pos, ioflags, NULL);
 
 	*ppos = kiocb.ki_pos;
 	return rval;
@@ -227,11 +221,8 @@ xfs_file_sendfile(
 	read_actor_t		actor,
 	void			*target)
 {
-	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval);
-	return rval;
+	return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
+				filp, pos, 0, count, actor, target, NULL);
 }
 
 STATIC ssize_t
@@ -242,11 +233,8 @@ xfs_file_sendfile_invis(
 	read_actor_t		actor,
 	void			*target)
 {
-	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval);
-	return rval;
+	return bhv_vop_sendfile(vn_from_inode(filp->f_dentry->d_inode),
+				filp, pos, IO_INVIS, count, actor, target, NULL);
 }
 
 STATIC ssize_t
@@ -257,11 +245,8 @@ xfs_file_splice_read(
 	size_t			len,
 	unsigned int		flags)
 {
-	vnode_t			*vp = vn_from_inode(infilp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, 0, NULL, rval);
-	return rval;
+	return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
+				   infilp, ppos, pipe, len, flags, 0, NULL);
 }
 
 STATIC ssize_t
@@ -272,11 +257,9 @@ xfs_file_splice_read_invis(
 	size_t			len,
 	unsigned int		flags)
 {
-	vnode_t			*vp = vn_from_inode(infilp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, IO_INVIS, NULL, rval);
-	return rval;
+	return bhv_vop_splice_read(vn_from_inode(infilp->f_dentry->d_inode),
+				   infilp, ppos, pipe, len, flags, IO_INVIS,
+				   NULL);
 }
 
 STATIC ssize_t
@@ -287,11 +270,8 @@ xfs_file_splice_write(
 	size_t			len,
 	unsigned int		flags)
 {
-	vnode_t			*vp = vn_from_inode(outfilp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, 0, NULL, rval);
-	return rval;
+	return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
+				    pipe, outfilp, ppos, len, flags, 0, NULL);
 }
 
 STATIC ssize_t
@@ -302,11 +282,9 @@ xfs_file_splice_write_invis(
 	size_t			len,
 	unsigned int		flags)
 {
-	vnode_t			*vp = vn_from_inode(outfilp->f_dentry->d_inode);
-	ssize_t			rval;
-
-	VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, IO_INVIS, NULL, rval);
-	return rval;
+	return bhv_vop_splice_write(vn_from_inode(outfilp->f_dentry->d_inode),
+				    pipe, outfilp, ppos, len, flags, IO_INVIS,
+				    NULL);
 }
 
 STATIC int
@@ -314,13 +292,17 @@ xfs_file_open(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = vn_from_inode(inode);
-	int		error;
-
 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 		return -EFBIG;
-	VOP_OPEN(vp, NULL, error);
-	return -error;
+	return -bhv_vop_open(vn_from_inode(inode), NULL);
+}
+
+STATIC int
+xfs_file_close(
+	struct file	*filp)
+{
+	return -bhv_vop_close(vn_from_inode(filp->f_dentry->d_inode), 0,
+				file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
 }
 
 STATIC int
@@ -328,12 +310,11 @@ xfs_file_release(
 	struct inode	*inode,
 	struct file	*filp)
 {
-	vnode_t		*vp = vn_from_inode(inode);
-	int		error = 0;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	if (vp)
-		VOP_RELEASE(vp, error);
-	return -error;
+		return -bhv_vop_release(vp);
+	return 0;
 }
 
 STATIC int
@@ -342,15 +323,14 @@ xfs_file_fsync(
 	struct dentry	*dentry,
 	int		datasync)
 {
-	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
-	int		error;
+	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
 	int		flags = FSYNC_WAIT;
 
 	if (datasync)
 		flags |= FSYNC_DATA;
-	VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
-	return -error;
+	if (VN_TRUNC(vp))
+		VUNTRUNCATE(vp);
+	return -bhv_vop_fsync(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1);
 }
 
 #ifdef CONFIG_XFS_DMAPI
@@ -361,16 +341,11 @@ xfs_vm_nopage(
 	int			*type)
 {
 	struct inode	*inode = area->vm_file->f_dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
-	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
-	int		error;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
-
-	error = XFS_SEND_MMAP(mp, area, 0);
-	if (error)
+	if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0))
 		return NULL;
-
 	return filemap_nopage(area, address, type);
 }
 #endif /* CONFIG_XFS_DMAPI */
@@ -382,7 +357,7 @@ xfs_file_readdir(
 	filldir_t	filldir)
 {
 	int		error = 0;
-	vnode_t		*vp = vn_from_inode(filp->f_dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(filp->f_dentry->d_inode);
 	uio_t		uio;
 	iovec_t		iov;
 	int		eof = 0;
@@ -417,7 +392,7 @@ xfs_file_readdir(
 
 		start_offset = uio.uio_offset;
 
-		VOP_READDIR(vp, &uio, NULL, &eof, error);
+		error = bhv_vop_readdir(vp, &uio, NULL, &eof);
 		if ((uio.uio_offset == start_offset) || error) {
 			size = 0;
 			break;
@@ -456,38 +431,28 @@ xfs_file_mmap(
 	struct file	*filp,
 	struct vm_area_struct *vma)
 {
-	struct inode	*ip = filp->f_dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(ip);
-	vattr_t		vattr;
-	int		error;
-
 	vma->vm_ops = &xfs_file_vm_ops;
 
 #ifdef CONFIG_XFS_DMAPI
-	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+	if (vn_from_inode(filp->f_dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
 		vma->vm_ops = &xfs_dmapi_file_vm_ops;
-	}
 #endif /* CONFIG_XFS_DMAPI */
 
-	vattr.va_mask = XFS_AT_UPDATIME;
-	VOP_SETATTR(vp, &vattr, XFS_AT_UPDATIME, NULL, error);
-	if (likely(!error))
-		__vn_revalidate(vp, &vattr);	/* update flags */
+	file_accessed(filp);
 	return 0;
 }
 
-
 STATIC long
 xfs_file_ioctl(
 	struct file	*filp,
 	unsigned int	cmd,
-	unsigned long	arg)
+	unsigned long	p)
 {
 	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
-	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
+	error = bhv_vop_ioctl(vp, inode, filp, 0, cmd, (void __user *)p);
 	VMODIFY(vp);
 
 	/* NOTE:  some of the ioctl's return positive #'s as a
@@ -503,13 +468,13 @@ STATIC long
 xfs_file_ioctl_invis(
 	struct file	*filp,
 	unsigned int	cmd,
-	unsigned long	arg)
+	unsigned long	p)
 {
-	struct inode	*inode = filp->f_dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
 	int		error;
+	struct inode	*inode = filp->f_dentry->d_inode;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
-	VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
+	error = bhv_vop_ioctl(vp, inode, filp, IO_INVIS, cmd, (void __user *)p);
 	VMODIFY(vp);
 
 	/* NOTE:  some of the ioctl's return positive #'s as a
@@ -528,7 +493,7 @@ xfs_vm_mprotect(
 	struct vm_area_struct *vma,
 	unsigned int	newflags)
 {
-	vnode_t		*vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(vma->vm_file->f_dentry->d_inode);
 	int		error = 0;
 
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
@@ -554,24 +519,19 @@ STATIC int
 xfs_file_open_exec(
 	struct inode	*inode)
 {
-	vnode_t		*vp = vn_from_inode(inode);
-	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
-	int		error = 0;
-	xfs_inode_t	*ip;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
-	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-		ip = xfs_vtoi(vp);
-		if (!ip) {
-			error = -EINVAL;
-			goto open_exec_out;
-		}
-		if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
-			error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
+	if (unlikely(vp->v_vfsp->vfs_flag & VFS_DMI)) {
+		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+		xfs_inode_t	*ip = xfs_vtoi(vp);
+
+		if (!ip)
+			return -EINVAL;
+		if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ))
+			return -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
 					       0, 0, 0, NULL);
-		}
 	}
-open_exec_out:
-	return error;
+	return 0;
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
@@ -592,6 +552,7 @@ const struct file_operations xfs_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
+	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
@@ -616,6 +577,7 @@ const struct file_operations xfs_invis_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
+	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 };
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 575f2a790f3..dc0562828e7 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -15,40 +15,12 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-
 #include "xfs.h"
 
-/*
- * Stub for no-op vnode operations that return error status.
- */
-int
-fs_noerr(void)
-{
-	return 0;
-}
+int  fs_noerr(void) { return 0; }
+int  fs_nosys(void) { return ENOSYS; }
+void fs_noval(void) { return; }
 
-/*
- * Operation unsupported under this file system.
- */
-int
-fs_nosys(void)
-{
-	return ENOSYS;
-}
-
-/*
- * Stub for inactive, strategy, and read/write lock/unlock.  Does nothing.
- */
-/* ARGSUSED */
-void
-fs_noval(void)
-{
-}
-
-/*
- * vnode pcache layer for vnode_tosspages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
 void
 fs_tosspages(
 	bhv_desc_t	*bdp,
@@ -56,18 +28,13 @@ fs_tosspages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
 	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp))
 		truncate_inode_pages(ip->i_mapping, first);
 }
 
-
-/*
- * vnode pcache layer for vnode_flushinval_pages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
 void
 fs_flushinval_pages(
 	bhv_desc_t	*bdp,
@@ -75,20 +42,17 @@ fs_flushinval_pages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
 	struct inode	*ip = vn_to_inode(vp);
 
 	if (VN_CACHED(vp)) {
+		if (VN_TRUNC(vp))
+			VUNTRUNCATE(vp);
 		filemap_write_and_wait(ip->i_mapping);
-
 		truncate_inode_pages(ip->i_mapping, first);
 	}
 }
 
-/*
- * vnode pcache layer for vnode_flush_pages.
- * 'last' parameter unused but left in for IRIX compatibility
- */
 int
 fs_flush_pages(
 	bhv_desc_t	*bdp,
@@ -97,15 +61,16 @@ fs_flush_pages(
 	uint64_t	flags,
 	int		fiopt)
 {
-	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
 	struct inode	*ip = vn_to_inode(vp);
 
-	if (VN_CACHED(vp)) {
+	if (VN_DIRTY(vp)) {
+		if (VN_TRUNC(vp))
+			VUNTRUNCATE(vp);
 		filemap_fdatawrite(ip->i_mapping);
 		if (flags & XFS_B_ASYNC)
 			return 0;
 		filemap_fdatawait(ip->i_mapping);
 	}
-
 	return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 6e8085f3463..6c162c3dde7 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -45,6 +45,7 @@ xfs_param_t xfs_params = {
 	.xfs_buf_age	= {	1*100,		15*100,		7200*100},
 	.inherit_nosym	= {	0,		0,		1	},
 	.rotorstep	= {	1,		1,		255	},
+	.inherit_nodfrg	= {	0,		1,		1	},
 };
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 84478491609..6e52a5dd38d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -31,7 +30,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
@@ -78,7 +76,7 @@ xfs_find_handle(
 	xfs_handle_t		handle;
 	xfs_fsop_handlereq_t	hreq;
 	struct inode		*inode;
-	struct vnode		*vp;
+	bhv_vnode_t		*vp;
 
 	if (copy_from_user(&hreq, arg, sizeof(hreq)))
 		return -XFS_ERROR(EFAULT);
@@ -192,7 +190,7 @@ xfs_vget_fsop_handlereq(
 	xfs_mount_t		*mp,
 	struct inode		*parinode,	/* parent inode pointer    */
 	xfs_fsop_handlereq_t	*hreq,
-	vnode_t			**vp,
+	bhv_vnode_t		**vp,
 	struct inode		**inode)
 {
 	void			__user *hanp;
@@ -202,7 +200,7 @@ xfs_vget_fsop_handlereq(
 	xfs_handle_t		handle;
 	xfs_inode_t		*ip;
 	struct inode		*inodep;
-	vnode_t			*vpp;
+	bhv_vnode_t		*vpp;
 	xfs_ino_t		ino;
 	__u32			igen;
 	int			error;
@@ -277,7 +275,7 @@ xfs_open_by_handle(
 	struct file		*filp;
 	struct inode		*inode;
 	struct dentry		*dentry;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	xfs_fsop_handlereq_t	hreq;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -362,7 +360,7 @@ xfs_readlink_by_handle(
 	struct uio		auio;
 	struct inode		*inode;
 	xfs_fsop_handlereq_t	hreq;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	__u32			olen;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -393,9 +391,11 @@ xfs_readlink_by_handle(
 	auio.uio_segflg	= UIO_USERSPACE;
 	auio.uio_resid	= olen;
 
-	VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
-
+	error = bhv_vop_readlink(vp, &auio, IO_INVIS, NULL);
 	VN_RELE(vp);
+	if (error)
+		return -error;
+
 	return (olen - auio.uio_resid);
 }
 
@@ -411,7 +411,7 @@ xfs_fssetdm_by_handle(
 	xfs_fsop_setdm_handlereq_t dmhreq;
 	struct inode		*inode;
 	bhv_desc_t		*bdp;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 
 	if (!capable(CAP_MKNOD))
 		return -XFS_ERROR(EPERM);
@@ -452,7 +452,7 @@ xfs_attrlist_by_handle(
 	attrlist_cursor_kern_t	*cursor;
 	xfs_fsop_attrlist_handlereq_t al_hreq;
 	struct inode		*inode;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	char			*kbuf;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -472,8 +472,8 @@ xfs_attrlist_by_handle(
 		goto out_vn_rele;
 
 	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-	VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
-			cursor, NULL, error);
+	error = bhv_vop_attr_list(vp, kbuf, al_hreq.buflen, al_hreq.flags,
+					cursor, NULL);
 	if (error)
 		goto out_kfree;
 
@@ -490,7 +490,7 @@ xfs_attrlist_by_handle(
 
 STATIC int
 xfs_attrmulti_attr_get(
-	struct vnode		*vp,
+	bhv_vnode_t		*vp,
 	char			*name,
 	char			__user *ubuf,
 	__uint32_t		*len,
@@ -505,7 +505,7 @@ xfs_attrmulti_attr_get(
 	if (!kbuf)
 		return ENOMEM;
 
-	VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
+	error = bhv_vop_attr_get(vp, name, kbuf, len, flags, NULL);
 	if (error)
 		goto out_kfree;
 
@@ -519,7 +519,7 @@ xfs_attrmulti_attr_get(
 
 STATIC int
 xfs_attrmulti_attr_set(
-	struct vnode		*vp,
+	bhv_vnode_t		*vp,
 	char			*name,
 	const char		__user *ubuf,
 	__uint32_t		len,
@@ -542,7 +542,7 @@ xfs_attrmulti_attr_set(
 	if (copy_from_user(kbuf, ubuf, len))
 		goto out_kfree;
 			
-	VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
+	error = bhv_vop_attr_set(vp, name, kbuf, len, flags, NULL);
 
  out_kfree:
 	kfree(kbuf);
@@ -551,20 +551,15 @@ xfs_attrmulti_attr_set(
 
 STATIC int
 xfs_attrmulti_attr_remove(
-	struct vnode		*vp,
+	bhv_vnode_t		*vp,
 	char			*name,
 	__uint32_t		flags)
 {
-	int			error;
-
-
 	if (IS_RDONLY(&vp->v_inode))
 		return -EROFS;
 	if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
 		return EPERM;
-
-	VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
-	return error;
+	return bhv_vop_attr_remove(vp, name, flags, NULL);
 }
 
 STATIC int
@@ -578,7 +573,7 @@ xfs_attrmulti_by_handle(
 	xfs_attr_multiop_t	*ops;
 	xfs_fsop_attrmulti_handlereq_t am_hreq;
 	struct inode		*inode;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	unsigned int		i, size;
 	char			*attr_name;
 
@@ -658,7 +653,7 @@ xfs_attrmulti_by_handle(
 STATIC int
 xfs_ioc_space(
 	bhv_desc_t		*bdp,
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	struct file		*filp,
 	int			flags,
 	unsigned int		cmd,
@@ -682,7 +677,7 @@ xfs_ioc_fsgeometry(
 
 STATIC int
 xfs_ioc_xattr(
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	xfs_inode_t		*ip,
 	struct file		*filp,
 	unsigned int		cmd,
@@ -711,7 +706,7 @@ xfs_ioctl(
 	void			__user *arg)
 {
 	int			error;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
 
@@ -962,7 +957,7 @@ xfs_ioctl(
 STATIC int
 xfs_ioc_space(
 	bhv_desc_t		*bdp,
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	struct file		*filp,
 	int			ioflags,
 	unsigned int		cmd,
@@ -1153,14 +1148,14 @@ xfs_di2lxflags(
 
 STATIC int
 xfs_ioc_xattr(
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	xfs_inode_t		*ip,
 	struct file		*filp,
 	unsigned int		cmd,
 	void			__user *arg)
 {
 	struct fsxattr		fa;
-	struct vattr		*vattr;
+	struct bhv_vattr	*vattr;
 	int			error = 0;
 	int			attr_flags;
 	unsigned int		flags;
@@ -1173,7 +1168,7 @@ xfs_ioc_xattr(
 	case XFS_IOC_FSGETXATTR: {
 		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
 				 XFS_AT_NEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		error = bhv_vop_getattr(vp, vattr, 0, NULL);
 		if (unlikely(error)) {
 			error = -error;
 			break;
@@ -1206,7 +1201,7 @@ xfs_ioc_xattr(
 		vattr->va_extsize = fa.fsx_extsize;
 		vattr->va_projid  = fa.fsx_projid;
 
-		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		error = bhv_vop_setattr(vp, vattr, attr_flags, NULL);
 		if (likely(!error))
 			__vn_revalidate(vp, vattr);	/* update flags */
 		error = -error;
@@ -1216,7 +1211,7 @@ xfs_ioc_xattr(
 	case XFS_IOC_FSGETXATTRA: {
 		vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
 				 XFS_AT_ANEXTENTS | XFS_AT_PROJID;
-		VOP_GETATTR(vp, vattr, 0, NULL, error);
+		error = bhv_vop_getattr(vp, vattr, 0, NULL);
 		if (unlikely(error)) {
 			error = -error;
 			break;
@@ -1262,7 +1257,7 @@ xfs_ioc_xattr(
 		vattr->va_xflags = xfs_merge_ioc_xflags(flags,
 							xfs_ip2xflags(ip));
 
-		VOP_SETATTR(vp, vattr, attr_flags, NULL, error);
+		error = bhv_vop_setattr(vp, vattr, attr_flags, NULL);
 		if (likely(!error))
 			__vn_revalidate(vp, vattr);	/* update flags */
 		error = -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 251bfe451a3..601f01c92f7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -114,7 +114,7 @@ xfs_compat_ioctl(
 	unsigned long	arg)
 {
 	struct inode	*inode = file->f_dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	int		error;
 
 	switch (cmd) {
@@ -193,7 +193,7 @@ xfs_compat_ioctl(
 		return -ENOIOCTLCMD;
 	}
 
-	VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error);
+	error = bhv_vop_ioctl(vp, inode, file, mode, cmd, (void __user *)arg);
 	VMODIFY(vp);
 
 	return error;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2e2e275c786..12810baeb5d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -61,7 +59,7 @@
  */
 xfs_inode_t *
 xfs_vtoi(
-	struct vnode	*vp)
+	bhv_vnode_t	*vp)
 {
 	bhv_desc_t      *bdp;
 
@@ -80,7 +78,7 @@ void
 xfs_synchronize_atime(
 	xfs_inode_t	*ip)
 {
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = XFS_ITOV_NULL(ip);
 	if (vp) {
@@ -200,14 +198,10 @@ xfs_ichgtime_fast(
 STATIC void
 xfs_validate_fields(
 	struct inode	*ip,
-	struct vattr	*vattr)
+	bhv_vattr_t	*vattr)
 {
-	vnode_t		*vp = vn_from_inode(ip);
-	int		error;
-
 	vattr->va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
-	VOP_GETATTR(vp, vattr, ATTR_LAZY, NULL, error);
-  	if (likely(!error)) {
+	if (!bhv_vop_getattr(vn_from_inode(ip), vattr, ATTR_LAZY, NULL)) {
 		ip->i_nlink = vattr->va_nlink;
 		ip->i_blocks = vattr->va_nblocks;
 
@@ -225,7 +219,7 @@ xfs_validate_fields(
  */
 STATIC int
 xfs_init_security(
-	struct vnode	*vp,
+	bhv_vnode_t	*vp,
 	struct inode	*dir)
 {
 	struct inode	*ip = vn_to_inode(vp);
@@ -241,7 +235,7 @@ xfs_init_security(
 		return -error;
 	}
 
-	VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
+	error = bhv_vop_attr_set(vp, name, value, length, ATTR_SECURE, NULL);
 	if (!error)
 		VMODIFY(vp);
 
@@ -264,13 +258,12 @@ xfs_has_fs_struct(struct task_struct *task)
 
 STATIC inline void
 xfs_cleanup_inode(
-	vnode_t		*dvp,
-	vnode_t		*vp,
+	bhv_vnode_t	*dvp,
+	bhv_vnode_t	*vp,
 	struct dentry	*dentry,
 	int		mode)
 {
 	struct dentry   teardown = {};
-	int             error;
 
 	/* Oh, the horror.
 	 * If we can't add the ACL or we fail in
@@ -281,9 +274,9 @@ xfs_cleanup_inode(
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
-	  	VOP_RMDIR(dvp, &teardown, NULL, error);
+	  	bhv_vop_rmdir(dvp, &teardown, NULL);
 	else
-		VOP_REMOVE(dvp, &teardown, NULL, error);
+		bhv_vop_remove(dvp, &teardown, NULL);
 	VN_RELE(vp);
 }
 
@@ -295,8 +288,8 @@ xfs_vn_mknod(
 	dev_t		rdev)
 {
 	struct inode	*ip;
-	vattr_t		vattr = { 0 };
-	vnode_t		*vp = NULL, *dvp = vn_from_inode(dir);
+	bhv_vattr_t	vattr = { 0 };
+	bhv_vnode_t	*vp = NULL, *dvp = vn_from_inode(dir);
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
 	int		error;
@@ -330,10 +323,10 @@ xfs_vn_mknod(
 		vattr.va_mask |= XFS_AT_RDEV;
 		/*FALLTHROUGH*/
 	case S_IFREG:
-		VOP_CREATE(dvp, dentry, &vattr, &vp, NULL, error);
+		error = bhv_vop_create(dvp, dentry, &vattr, &vp, NULL);
 		break;
 	case S_IFDIR:
-		VOP_MKDIR(dvp, dentry, &vattr, &vp, NULL, error);
+		error = bhv_vop_mkdir(dvp, dentry, &vattr, &vp, NULL);
 		break;
 	default:
 		error = EINVAL;
@@ -396,14 +389,14 @@ xfs_vn_lookup(
 	struct dentry	*dentry,
 	struct nameidata *nd)
 {
-	struct vnode	*vp = vn_from_inode(dir), *cvp;
+	bhv_vnode_t	*vp = vn_from_inode(dir), *cvp;
 	int		error;
 
 	if (dentry->d_name.len >= MAXNAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
-	if (error) {
+	error = bhv_vop_lookup(vp, dentry, &cvp, 0, NULL, NULL);
+	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
 			return ERR_PTR(-error);
 		d_add(dentry, NULL);
@@ -420,9 +413,9 @@ xfs_vn_link(
 	struct dentry	*dentry)
 {
 	struct inode	*ip;	/* inode of guy being linked to */
-	vnode_t		*tdvp;	/* target directory for new name/link */
-	vnode_t		*vp;	/* vp of name being linked */
-	vattr_t		vattr;
+	bhv_vnode_t	*tdvp;	/* target directory for new name/link */
+	bhv_vnode_t	*vp;	/* vp of name being linked */
+	bhv_vattr_t	vattr;
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
@@ -432,7 +425,7 @@ xfs_vn_link(
 	tdvp = vn_from_inode(dir);
 	vp = vn_from_inode(ip);
 
-	VOP_LINK(tdvp, vp, dentry, NULL, error);
+	error = bhv_vop_link(tdvp, vp, dentry, NULL);
 	if (likely(!error)) {
 		VMODIFY(tdvp);
 		VN_HOLD(vp);
@@ -448,14 +441,14 @@ xfs_vn_unlink(
 	struct dentry	*dentry)
 {
 	struct inode	*inode;
-	vnode_t		*dvp;	/* directory containing name to remove */
-	vattr_t		vattr;
+	bhv_vnode_t	*dvp;	/* directory containing name to remove */
+	bhv_vattr_t	vattr;
 	int		error;
 
 	inode = dentry->d_inode;
 	dvp = vn_from_inode(dir);
 
-	VOP_REMOVE(dvp, dentry, NULL, error);
+	error = bhv_vop_remove(dvp, dentry, NULL);
 	if (likely(!error)) {
 		xfs_validate_fields(dir, &vattr);	/* size needs update */
 		xfs_validate_fields(inode, &vattr);
@@ -470,27 +463,26 @@ xfs_vn_symlink(
 	const char	*symname)
 {
 	struct inode	*ip;
-	vattr_t		vattr = { 0 };
-	vnode_t		*dvp;	/* directory containing name of symlink */
-	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
+	bhv_vattr_t	va = { 0 };
+	bhv_vnode_t	*dvp;	/* directory containing name of symlink */
+	bhv_vnode_t	*cvp;	/* used to lookup symlink to put in dentry */
 	int		error;
 
 	dvp = vn_from_inode(dir);
 	cvp = NULL;
 
-	vattr.va_mode = S_IFLNK |
+	va.va_mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
-	vattr.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
-	error = 0;
-	VOP_SYMLINK(dvp, dentry, &vattr, (char *)symname, &cvp, NULL, error);
+	error = bhv_vop_symlink(dvp, dentry, &va, (char *)symname, &cvp, NULL);
 	if (likely(!error && cvp)) {
 		error = xfs_init_security(cvp, dir);
 		if (likely(!error)) {
 			ip = vn_to_inode(cvp);
 			d_instantiate(dentry, ip);
-			xfs_validate_fields(dir, &vattr);
-			xfs_validate_fields(ip, &vattr);
+			xfs_validate_fields(dir, &va);
+			xfs_validate_fields(ip, &va);
 		} else {
 			xfs_cleanup_inode(dvp, cvp, dentry, 0);
 		}
@@ -504,11 +496,11 @@ xfs_vn_rmdir(
 	struct dentry	*dentry)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*dvp = vn_from_inode(dir);
-	vattr_t		vattr;
+	bhv_vnode_t	*dvp = vn_from_inode(dir);
+	bhv_vattr_t	vattr;
 	int		error;
 
-	VOP_RMDIR(dvp, dentry, NULL, error);
+	error = bhv_vop_rmdir(dvp, dentry, NULL);
 	if (likely(!error)) {
 		xfs_validate_fields(inode, &vattr);
 		xfs_validate_fields(dir, &vattr);
@@ -524,15 +516,15 @@ xfs_vn_rename(
 	struct dentry	*ndentry)
 {
 	struct inode	*new_inode = ndentry->d_inode;
-	vnode_t		*fvp;	/* from directory */
-	vnode_t		*tvp;	/* target directory */
-	vattr_t		vattr;
+	bhv_vnode_t	*fvp;	/* from directory */
+	bhv_vnode_t	*tvp;	/* target directory */
+	bhv_vattr_t	vattr;
 	int		error;
 
 	fvp = vn_from_inode(odir);
 	tvp = vn_from_inode(ndir);
 
-	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
+	error = bhv_vop_rename(fvp, odentry, tvp, ndentry, NULL);
 	if (likely(!error)) {
 		if (new_inode)
 			xfs_validate_fields(new_inode, &vattr);
@@ -553,7 +545,7 @@ xfs_vn_follow_link(
 	struct dentry		*dentry,
 	struct nameidata	*nd)
 {
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	uio_t			*uio;
 	iovec_t			iov;
 	int			error;
@@ -586,8 +578,8 @@ xfs_vn_follow_link(
 	uio->uio_resid = MAXPATHLEN;
 	uio->uio_iovcnt = 1;
 
-	VOP_READLINK(vp, uio, 0, NULL, error);
-	if (error) {
+	error = bhv_vop_readlink(vp, uio, 0, NULL);
+	if (unlikely(error)) {
 		kfree(link);
 		link = ERR_PTR(-error);
 	} else {
@@ -618,12 +610,7 @@ xfs_vn_permission(
 	int		mode,
 	struct nameidata *nd)
 {
-	vnode_t		*vp = vn_from_inode(inode);
-	int		error;
-
-	mode <<= 6;		/* convert from linux to vnode access bits */
-	VOP_ACCESS(vp, mode, NULL, error);
-	return -error;
+	return -bhv_vop_access(vn_from_inode(inode), mode << 6, NULL);
 }
 #else
 #define xfs_vn_permission NULL
@@ -636,14 +623,14 @@ xfs_vn_getattr(
 	struct kstat	*stat)
 {
 	struct inode	*inode = dentry->d_inode;
-	vnode_t		*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 	int		error = 0;
 
 	if (unlikely(vp->v_flag & VMODIFIED))
 		error = vn_revalidate(vp);
 	if (!error)
 		generic_fillattr(inode, stat);
-	return 0;
+	return -error;
 }
 
 STATIC int
@@ -653,8 +640,8 @@ xfs_vn_setattr(
 {
 	struct inode	*inode = dentry->d_inode;
 	unsigned int	ia_valid = attr->ia_valid;
-	vnode_t		*vp = vn_from_inode(inode);
-	vattr_t		vattr = { 0 };
+	bhv_vnode_t	*vp = vn_from_inode(inode);
+	bhv_vattr_t	vattr = { 0 };
 	int		flags = 0;
 	int		error;
 
@@ -697,7 +684,7 @@ xfs_vn_setattr(
 		flags |= ATTR_NONBLOCK;
 #endif
 
-	VOP_SETATTR(vp, &vattr, flags, NULL, error);
+	error = bhv_vop_setattr(vp, &vattr, flags, NULL);
 	if (likely(!error))
 		__vn_revalidate(vp, &vattr);
 	return -error;
@@ -718,7 +705,7 @@ xfs_vn_setxattr(
 	size_t		size,
 	int		flags)
 {
-	vnode_t		*vp = vn_from_inode(dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -748,7 +735,7 @@ xfs_vn_getxattr(
 	void		*data,
 	size_t		size)
 {
-	vnode_t		*vp = vn_from_inode(dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
@@ -777,7 +764,7 @@ xfs_vn_listxattr(
 	char			*data,
 	size_t			size)
 {
-	vnode_t			*vp = vn_from_inode(dentry->d_inode);
+	bhv_vnode_t		*vp = vn_from_inode(dentry->d_inode);
 	int			error, xflags = ATTR_KERNAMELS;
 	ssize_t			result;
 
@@ -796,7 +783,7 @@ xfs_vn_removexattr(
 	struct dentry	*dentry,
 	const char	*name)
 {
-	vnode_t		*vp = vn_from_inode(dentry->d_inode);
+	bhv_vnode_t	*vp = vn_from_inode(dentry->d_inode);
 	char		*attr = (char *)name;
 	attrnames_t	*namesp;
 	int		xflags = 0;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e9fe43d7476..aa26ab906c8 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -134,14 +134,21 @@ BUFFER_FNS(PrivateStart, unwritten);
 #define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
+#define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
 
-#ifndef raw_smp_processor_id
-#define raw_smp_processor_id()	smp_processor_id()
-#endif
-#define current_cpu()		raw_smp_processor_id()
+#define current_cpu()		(raw_smp_processor_id())
 #define current_pid()		(current->pid)
 #define current_fsuid(cred)	(current->fsuid)
 #define current_fsgid(cred)	(current->fsgid)
+#define current_set_flags(f)	(current->flags |= (f))
+#define current_test_flags(f)	(current->flags & (f))
+#define current_clear_flags(f)	(current->flags & ~(f))
+#define current_set_flags_nested(sp, f)		\
+		(*(sp) = current->flags, current->flags |= (f))
+#define current_clear_flags_nested(sp, f)	\
+		(*(sp) = current->flags, current->flags &= ~(f))
+#define current_restore_flags_nested(sp, f)	\
+		(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
 
 #define NBPP		PAGE_SIZE
 #define DPPSHFT		(PAGE_SHIFT - 9)
@@ -187,25 +194,9 @@ BUFFER_FNS(PrivateStart, unwritten);
 /* bytes to clicks */
 #define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
 
-#ifndef ENOATTR
 #define ENOATTR		ENODATA		/* Attribute not found */
-#endif
-
-/* Note: EWRONGFS never visible outside the kernel */
-#define	EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
-
-/*
- * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
- *     return codes out of its known range in errno.
- * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
- *     conflict with any code we use already or any code a driver may use)
- * XXX Some options (currently we do #2):
- *	1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
- *	2/ 990 ["Unknown error 990"]
- *	3/ EUCLEAN ["Structure needs cleaning"]
- *	4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
- */
-#define EFSCORRUPTED    990		/* Filesystem is corrupted */
+#define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
+#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
 
 #define SYNCHRONIZE()	barrier()
 #define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 67efe330898..5d9cfd91ad0 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -206,7 +204,7 @@ xfs_read(
 	xfs_fsize_t		n;
 	xfs_inode_t		*ip;
 	xfs_mount_t		*mp;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	unsigned long		seg;
 
 	ip = XFS_BHVTOI(bdp);
@@ -258,7 +256,7 @@ xfs_read(
 
 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
 	    !(ioflags & IO_INVIS)) {
-		vrwlock_t locktype = VRWLOCK_READ;
+		bhv_vrwlock_t locktype = VRWLOCK_READ;
 		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
 
 		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
@@ -271,7 +269,7 @@ xfs_read(
 	}
 
 	if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
-		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(*offset)),
+		bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
 						-1, FI_REMAPF_LOCKED);
 
 	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
@@ -313,7 +311,7 @@ xfs_sendfile(
 
 	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
 	    (!(ioflags & IO_INVIS))) {
-		vrwlock_t locktype = VRWLOCK_READ;
+		bhv_vrwlock_t locktype = VRWLOCK_READ;
 		int error;
 
 		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
@@ -357,7 +355,7 @@ xfs_splice_read(
 
 	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
 	    (!(ioflags & IO_INVIS))) {
-		vrwlock_t locktype = VRWLOCK_READ;
+		bhv_vrwlock_t locktype = VRWLOCK_READ;
 		int error;
 
 		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
@@ -401,7 +399,7 @@ xfs_splice_write(
 
 	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) &&
 	    (!(ioflags & IO_INVIS))) {
-		vrwlock_t locktype = VRWLOCK_WRITE;
+		bhv_vrwlock_t locktype = VRWLOCK_WRITE;
 		int error;
 
 		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
@@ -458,7 +456,7 @@ xfs_zero_last_block(
 	last_fsb = XFS_B_TO_FSBT(mp, isize);
 	nimaps = 1;
 	error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
-			  &nimaps, NULL);
+			  &nimaps, NULL, NULL);
 	if (error) {
 		return error;
 	}
@@ -499,7 +497,7 @@ xfs_zero_last_block(
 
 int					/* error (positive) */
 xfs_zero_eof(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_iocore_t	*io,
 	xfs_off_t	offset,		/* starting I/O offset */
 	xfs_fsize_t	isize,		/* current inode size */
@@ -510,7 +508,6 @@ xfs_zero_eof(
 	xfs_fileoff_t	end_zero_fsb;
 	xfs_fileoff_t	zero_count_fsb;
 	xfs_fileoff_t	last_fsb;
-	xfs_extlen_t	buf_len_fsb;
 	xfs_mount_t	*mp = io->io_mount;
 	int		nimaps;
 	int		error = 0;
@@ -556,7 +553,7 @@ xfs_zero_eof(
 		nimaps = 1;
 		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
 		error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
-				  0, NULL, 0, &imap, &nimaps, NULL);
+				  0, NULL, 0, &imap, &nimaps, NULL, NULL);
 		if (error) {
 			ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
 			ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -579,16 +576,7 @@ xfs_zero_eof(
 		}
 
 		/*
-		 * There are blocks in the range requested.
-		 * Zero them a single write at a time.  We actually
-		 * don't zero the entire range returned if it is
-		 * too big and simply loop around to get the rest.
-		 * That is not the most efficient thing to do, but it
-		 * is simple and this path should not be exercised often.
-		 */
-		buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
-					      mp->m_writeio_blocks << 8);
-		/*
+		 * There are blocks we need to zero.
 		 * Drop the inode lock while we're doing the I/O.
 		 * We'll still have the iolock to protect us.
 		 */
@@ -596,14 +584,13 @@ xfs_zero_eof(
 
 		error = xfs_iozero(ip,
 				   XFS_FSB_TO_B(mp, start_zero_fsb),
-				   XFS_FSB_TO_B(mp, buf_len_fsb),
+				   XFS_FSB_TO_B(mp, imap.br_blockcount),
 				   end_size);
-
 		if (error) {
 			goto out_lock;
 		}
 
-		start_zero_fsb = imap.br_startoff + buf_len_fsb;
+		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 
 		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
@@ -637,11 +624,11 @@ xfs_write(
 	ssize_t			ret = 0, error = 0;
 	xfs_fsize_t		isize, new_size;
 	xfs_iocore_t		*io;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	unsigned long		seg;
 	int			iolock;
 	int			eventsent = 0;
-	vrwlock_t		locktype;
+	bhv_vrwlock_t		locktype;
 	size_t			ocount = 0, count;
 	loff_t			pos;
 	int			need_i_mutex = 1, need_flush = 0;
@@ -679,11 +666,11 @@ xfs_write(
 	io = &xip->i_iocore;
 	mp = io->io_mount;
 
+	vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE);
+
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
-
 	if (ioflags & IO_ISDIRECT) {
 		xfs_buftarg_t	*target =
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
@@ -814,7 +801,7 @@ retry:
 		if (need_flush) {
 			xfs_inval_cached_trace(io, pos, -1,
 					ctooff(offtoct(pos)), -1);
-			VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
+			bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
 					-1, FI_REMAPF_LOCKED);
 		}
 
@@ -903,79 +890,9 @@ retry:
 
 	/* Handle various SYNC-type writes */
 	if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
-		/*
-		 * If we're treating this as O_DSYNC and we have not updated the
-		 * size, force the log.
-		 */
-		if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
-		    !(xip->i_update_size)) {
-			xfs_inode_log_item_t	*iip = xip->i_itemp;
-
-			/*
-			 * If an allocation transaction occurred
-			 * without extending the size, then we have to force
-			 * the log up the proper point to ensure that the
-			 * allocation is permanent.  We can't count on
-			 * the fact that buffered writes lock out direct I/O
-			 * writes - the direct I/O write could have extended
-			 * the size nontransactionally, then finished before
-			 * we started.  xfs_write_file will think that the file
-			 * didn't grow but the update isn't safe unless the
-			 * size change is logged.
-			 *
-			 * Force the log if we've committed a transaction
-			 * against the inode or if someone else has and
-			 * the commit record hasn't gone to disk (e.g.
-			 * the inode is pinned).  This guarantees that
-			 * all changes affecting the inode are permanent
-			 * when we return.
-			 */
-			if (iip && iip->ili_last_lsn) {
-				xfs_log_force(mp, iip->ili_last_lsn,
-						XFS_LOG_FORCE | XFS_LOG_SYNC);
-			} else if (xfs_ipincount(xip) > 0) {
-				xfs_log_force(mp, (xfs_lsn_t)0,
-						XFS_LOG_FORCE | XFS_LOG_SYNC);
-			}
-
-		} else {
-			xfs_trans_t	*tp;
-
-			/*
-			 * O_SYNC or O_DSYNC _with_ a size update are handled
-			 * the same way.
-			 *
-			 * If the write was synchronous then we need to make
-			 * sure that the inode modification time is permanent.
-			 * We'll have updated the timestamp above, so here
-			 * we use a synchronous transaction to log the inode.
-			 * It's not fast, but it's necessary.
-			 *
-			 * If this a dsync write and the size got changed
-			 * non-transactionally, then we need to ensure that
-			 * the size change gets logged in a synchronous
-			 * transaction.
-			 */
-
-			tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
-			if ((error = xfs_trans_reserve(tp, 0,
-						      XFS_SWRITE_LOG_RES(mp),
-						      0, 0, 0))) {
-				/* Transaction reserve failed */
-				xfs_trans_cancel(tp, 0);
-			} else {
-				/* Transaction reserve successful */
-				xfs_ilock(xip, XFS_ILOCK_EXCL);
-				xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
-				xfs_trans_ihold(tp, xip);
-				xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
-				xfs_trans_set_sync(tp);
-				error = xfs_trans_commit(tp, 0, NULL);
-				xfs_iunlock(xip, XFS_ILOCK_EXCL);
-			}
-			if (error)
-				goto out_unlock_internal;
-		}
+		error = xfs_write_sync_logforce(mp, xip);
+		if (error)
+			goto out_unlock_internal;
 
 		xfs_rwunlock(bdp, locktype);
 		if (need_i_mutex)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 8f453995235..c77e62efb74 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -18,8 +18,8 @@
 #ifndef __XFS_LRW_H__
 #define __XFS_LRW_H__
 
-struct vnode;
 struct bhv_desc;
+struct bhv_vnode;
 struct xfs_mount;
 struct xfs_iocore;
 struct xfs_inode;
@@ -49,7 +49,7 @@ struct xfs_iomap;
 #define	XFS_CTRUNC4		14
 #define	XFS_CTRUNC5		15
 #define	XFS_CTRUNC6		16
-#define	XFS_BUNMAPI		17
+#define	XFS_BUNMAP		17
 #define	XFS_INVAL_CACHED	18
 #define	XFS_DIORD_ENTER		19
 #define	XFS_DIOWR_ENTER		20
@@ -82,7 +82,7 @@ extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb(struct xfs_buf *);
 extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
 
-extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
+extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t);
 extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 				const struct iovec *, unsigned int,
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 1f0589a05ec..e480b610205 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -62,7 +62,7 @@ xfs_read_xfsstats(
 		while (j < xstats[i].endpoint) {
 			val = 0;
 			/* sum over all cpus */
-			for_each_cpu(c)
+			for_each_possible_cpu(c)
 				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
 			len += sprintf(buffer + len, " %u", val);
 			j++;
@@ -70,7 +70,7 @@ xfs_read_xfsstats(
 		buffer[len++] = '\n';
 	}
 	/* extra precision counters */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
 		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
 		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 68f4793e8a1..9bdef9d5190 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -151,7 +149,7 @@ xfs_set_inodeops(
 STATIC __inline__ void
 xfs_revalidate_inode(
 	xfs_mount_t		*mp,
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	xfs_inode_t		*ip)
 {
 	struct inode		*inode = vn_to_inode(vp);
@@ -206,7 +204,7 @@ xfs_revalidate_inode(
 void
 xfs_initialize_vnode(
 	bhv_desc_t		*bdp,
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	bhv_desc_t		*inode_bhv,
 	int			unlock)
 {
@@ -336,7 +334,7 @@ STATIC struct inode *
 xfs_fs_alloc_inode(
 	struct super_block	*sb)
 {
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 
 	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
 	if (unlikely(!vp))
@@ -359,13 +357,13 @@ xfs_fs_inode_init_once(
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 		      SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(vn_to_inode((vnode_t *)vnode));
+		inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
 }
 
 STATIC int
 xfs_init_zones(void)
 {
-	xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
+	xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
 					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
 					KM_ZONE_SPREAD,
 					xfs_fs_inode_init_once);
@@ -409,22 +407,17 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	vnode_t			*vp = vn_from_inode(inode);
+	bhv_vnode_t		*vp = vn_from_inode(inode);
 	int			error = 0, flags = FLUSH_INODE;
 
 	if (vp) {
 		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 		if (sync)
 			flags |= FLUSH_SYNC;
-		VOP_IFLUSH(vp, flags, error);
-		if (error == EAGAIN) {
-			if (sync)
-				VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
-			else
-				error = 0;
-		}
+		error = bhv_vop_iflush(vp, flags);
+		if (error == EAGAIN)
+			error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
 	}
-
 	return -error;
 }
 
@@ -432,8 +425,7 @@ STATIC void
 xfs_fs_clear_inode(
 	struct inode		*inode)
 {
-	vnode_t			*vp = vn_from_inode(inode);
-	int			error, cache;
+	bhv_vnode_t		*vp = vn_from_inode(inode);
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 
@@ -446,20 +438,18 @@ xfs_fs_clear_inode(
 	 * This can happen because xfs_iget_core calls xfs_idestroy if we
 	 * find an inode with di_mode == 0 but without IGET_CREATE set.
 	 */
-	if (vp->v_fbhv)
-		VOP_INACTIVE(vp, NULL, cache);
+	if (VNHEAD(vp))
+		bhv_vop_inactive(vp, NULL);
 
 	VN_LOCK(vp);
 	vp->v_flag &= ~VMODIFIED;
 	VN_UNLOCK(vp, 0);
 
-	if (vp->v_fbhv) {
-		VOP_RECLAIM(vp, error);
-		if (error)
-			panic("vn_purge: cannot reclaim");
-	}
+	if (VNHEAD(vp))
+		if (bhv_vop_reclaim(vp))
+			panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, vp);
 
-	ASSERT(vp->v_fbhv == NULL);
+	ASSERT(VNHEAD(vp) == NULL);
 
 #ifdef XFS_VNODE_TRACE
 	ktrace_free(vp->v_trace);
@@ -475,13 +465,13 @@ xfs_fs_clear_inode(
  */
 STATIC void
 xfs_syncd_queue_work(
-	struct vfs	*vfs,
+	struct bhv_vfs	*vfs,
 	void		*data,
-	void		(*syncer)(vfs_t *, void *))
+	void		(*syncer)(bhv_vfs_t *, void *))
 {
-	vfs_sync_work_t	*work;
+	struct bhv_vfs_sync_work *work;
 
-	work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+	work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
 	INIT_LIST_HEAD(&work->w_list);
 	work->w_syncer = syncer;
 	work->w_data = data;
@@ -500,7 +490,7 @@ xfs_syncd_queue_work(
  */
 STATIC void
 xfs_flush_inode_work(
-	vfs_t		*vfs,
+	bhv_vfs_t	*vfs,
 	void		*inode)
 {
 	filemap_flush(((struct inode *)inode)->i_mapping);
@@ -512,7 +502,7 @@ xfs_flush_inode(
 	xfs_inode_t	*ip)
 {
 	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
-	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
+	struct bhv_vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
@@ -525,7 +515,7 @@ xfs_flush_inode(
  */
 STATIC void
 xfs_flush_device_work(
-	vfs_t		*vfs,
+	bhv_vfs_t	*vfs,
 	void		*inode)
 {
 	sync_blockdev(vfs->vfs_super->s_bdev);
@@ -537,7 +527,7 @@ xfs_flush_device(
 	xfs_inode_t	*ip)
 {
 	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
-	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
+	struct bhv_vfs	*vfs = XFS_MTOVFS(ip->i_mount);
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
@@ -545,16 +535,16 @@ xfs_flush_device(
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
-#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE)
 STATIC void
 vfs_sync_worker(
-	vfs_t		*vfsp,
+	bhv_vfs_t	*vfsp,
 	void		*unused)
 {
 	int		error;
 
 	if (!(vfsp->vfs_flag & VFS_RDONLY))
-		VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+		error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
+					SYNC_ATTR | SYNC_REFCACHE, NULL);
 	vfsp->vfs_sync_seq++;
 	wmb();
 	wake_up(&vfsp->vfs_wait_single_sync_task);
@@ -565,8 +555,8 @@ xfssyncd(
 	void			*arg)
 {
 	long			timeleft;
-	vfs_t			*vfsp = (vfs_t *) arg;
-	struct vfs_sync_work	*work, *n;
+	bhv_vfs_t		*vfsp = (bhv_vfs_t *) arg;
+	bhv_vfs_sync_work_t	*work, *n;
 	LIST_HEAD		(tmp);
 
 	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
@@ -600,7 +590,7 @@ xfssyncd(
 			list_del(&work->w_list);
 			if (work == &vfsp->vfs_sync_work)
 				continue;
-			kmem_free(work, sizeof(struct vfs_sync_work));
+			kmem_free(work, sizeof(struct bhv_vfs_sync_work));
 		}
 	}
 
@@ -609,7 +599,7 @@ xfssyncd(
 
 STATIC int
 xfs_fs_start_syncd(
-	vfs_t			*vfsp)
+	bhv_vfs_t		*vfsp)
 {
 	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
 	vfsp->vfs_sync_work.w_vfs = vfsp;
@@ -621,7 +611,7 @@ xfs_fs_start_syncd(
 
 STATIC void
 xfs_fs_stop_syncd(
-	vfs_t			*vfsp)
+	bhv_vfs_t		*vfsp)
 {
 	kthread_stop(vfsp->vfs_sync_task);
 }
@@ -630,35 +620,26 @@ STATIC void
 xfs_fs_put_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = vfs_from_sb(sb);
+	bhv_vfs_t		*vfsp = vfs_from_sb(sb);
 	int			error;
 
 	xfs_fs_stop_syncd(vfsp);
-	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
-	if (!error)
-		VFS_UNMOUNT(vfsp, 0, NULL, error);
+	bhv_vfs_sync(vfsp, SYNC_ATTR | SYNC_DELWRI, NULL);
+	error = bhv_vfs_unmount(vfsp, 0, NULL);
 	if (error) {
-		printk("XFS unmount got error %d\n", error);
-		printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
-		return;
+		printk("XFS: unmount got error=%d\n", error);
+		printk("%s: vfs=0x%p left dangling!\n", __FUNCTION__, vfsp);
+	} else {
+		vfs_deallocate(vfsp);
 	}
-
-	vfs_deallocate(vfsp);
 }
 
 STATIC void
 xfs_fs_write_super(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = vfs_from_sb(sb);
-	int			error;
-
-	if (sb->s_flags & MS_RDONLY) {
-		sb->s_dirt = 0; /* paranoia */
-		return;
-	}
-	/* Push the log and superblock a little */
-	VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
+	if (!(sb->s_flags & MS_RDONLY))
+		bhv_vfs_sync(vfs_from_sb(sb), SYNC_FSDATA, NULL);
 	sb->s_dirt = 0;
 }
 
@@ -667,16 +648,16 @@ xfs_fs_sync_super(
 	struct super_block	*sb,
 	int			wait)
 {
-	vfs_t		*vfsp = vfs_from_sb(sb);
-	int		error;
-	int		flags = SYNC_FSDATA;
+	bhv_vfs_t		*vfsp = vfs_from_sb(sb);
+	int			error;
+	int			flags;
 
 	if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
 		flags = SYNC_QUIESCE;
 	else
 		flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
 
-	VFS_SYNC(vfsp, flags, NULL, error);
+	error = bhv_vfs_sync(vfsp, flags, NULL);
 	sb->s_dirt = 0;
 
 	if (unlikely(laptop_mode)) {
@@ -703,14 +684,11 @@ xfs_fs_sync_super(
 
 STATIC int
 xfs_fs_statfs(
-	struct super_block	*sb,
+	struct dentry		*dentry,
 	struct kstatfs		*statp)
 {
-	vfs_t			*vfsp = vfs_from_sb(sb);
-	int			error;
-
-	VFS_STATVFS(vfsp, statp, NULL, error);
-	return -error;
+	return -bhv_vfs_statvfs(vfs_from_sb(dentry->d_sb), statp,
+				vn_from_inode(dentry->d_inode));
 }
 
 STATIC int
@@ -719,13 +697,13 @@ xfs_fs_remount(
 	int			*flags,
 	char			*options)
 {
-	vfs_t			*vfsp = vfs_from_sb(sb);
+	bhv_vfs_t		*vfsp = vfs_from_sb(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb, 0);
 	int			error;
 
-	VFS_PARSEARGS(vfsp, options, args, 1, error);
+	error = bhv_vfs_parseargs(vfsp, options, args, 1);
 	if (!error)
-		VFS_MNTUPDATE(vfsp, flags, args, error);
+		error = bhv_vfs_mntupdate(vfsp, flags, args);
 	kmem_free(args, sizeof(*args));
 	return -error;
 }
@@ -734,7 +712,7 @@ STATIC void
 xfs_fs_lockfs(
 	struct super_block	*sb)
 {
-	VFS_FREEZE(vfs_from_sb(sb));
+	bhv_vfs_freeze(vfs_from_sb(sb));
 }
 
 STATIC int
@@ -742,11 +720,7 @@ xfs_fs_show_options(
 	struct seq_file		*m,
 	struct vfsmount		*mnt)
 {
-	struct vfs		*vfsp = vfs_from_sb(mnt->mnt_sb);
-	int			error;
-
-	VFS_SHOWARGS(vfsp, m, error);
-	return error;
+	return -bhv_vfs_showargs(vfs_from_sb(mnt->mnt_sb), m);
 }
 
 STATIC int
@@ -754,11 +728,7 @@ xfs_fs_quotasync(
 	struct super_block	*sb,
 	int			type)
 {
-	struct vfs		*vfsp = vfs_from_sb(sb);
-	int			error;
-
-	VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
-	return -error;
+	return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XQUOTASYNC, 0, NULL);
 }
 
 STATIC int
@@ -766,11 +736,7 @@ xfs_fs_getxstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
 {
-	struct vfs		*vfsp = vfs_from_sb(sb);
-	int			error;
-
-	VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
-	return -error;
+	return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XGETQSTAT, 0, (caddr_t)fqs);
 }
 
 STATIC int
@@ -779,11 +745,7 @@ xfs_fs_setxstate(
 	unsigned int		flags,
 	int			op)
 {
-	struct vfs		*vfsp = vfs_from_sb(sb);
-	int			error;
-
-	VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
-	return -error;
+	return -bhv_vfs_quotactl(vfs_from_sb(sb), op, 0, (caddr_t)&flags);
 }
 
 STATIC int
@@ -793,13 +755,10 @@ xfs_fs_getxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = vfs_from_sb(sb);
-	int			error, getmode;
-
-	getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
-		 ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
-	VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
-	return -error;
+	return -bhv_vfs_quotactl(vfs_from_sb(sb),
+				 (type == USRQUOTA) ? Q_XGETQUOTA :
+				  ((type == GRPQUOTA) ? Q_XGETGQUOTA :
+				   Q_XGETPQUOTA), id, (caddr_t)fdq);
 }
 
 STATIC int
@@ -809,13 +768,10 @@ xfs_fs_setxquota(
 	qid_t			id,
 	struct fs_disk_quota	*fdq)
 {
-	struct vfs		*vfsp = vfs_from_sb(sb);
-	int			error, setmode;
-
-	setmode = (type == USRQUOTA) ? Q_XSETQLIM :
-		 ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
-	VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
-	return -error;
+	return -bhv_vfs_quotactl(vfs_from_sb(sb),
+				 (type == USRQUOTA) ? Q_XSETQLIM :
+				  ((type == GRPQUOTA) ? Q_XSETGQLIM :
+				   Q_XSETPQLIM), id, (caddr_t)fdq);
 }
 
 STATIC int
@@ -824,34 +780,32 @@ xfs_fs_fill_super(
 	void			*data,
 	int			silent)
 {
-	vnode_t			*rootvp;
-	struct vfs		*vfsp = vfs_allocate(sb);
+	struct bhv_vnode	*rootvp;
+	struct bhv_vfs		*vfsp = vfs_allocate(sb);
 	struct xfs_mount_args	*args = xfs_args_allocate(sb, silent);
 	struct kstatfs		statvfs;
-	int			error, error2;
+	int			error;
 
 	bhv_insert_all_vfsops(vfsp);
 
-	VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+	error = bhv_vfs_parseargs(vfsp, (char *)data, args, 0);
 	if (error) {
 		bhv_remove_all_vfsops(vfsp, 1);
 		goto fail_vfsop;
 	}
 
 	sb_min_blocksize(sb, BBSIZE);
-#ifdef CONFIG_XFS_EXPORT
 	sb->s_export_op = &xfs_export_operations;
-#endif
 	sb->s_qcop = &xfs_quotactl_operations;
 	sb->s_op = &xfs_super_operations;
 
-	VFS_MOUNT(vfsp, args, NULL, error);
+	error = bhv_vfs_mount(vfsp, args, NULL);
 	if (error) {
 		bhv_remove_all_vfsops(vfsp, 1);
 		goto fail_vfsop;
 	}
 
-	VFS_STATVFS(vfsp, &statvfs, NULL, error);
+	error = bhv_vfs_statvfs(vfsp, &statvfs, NULL);
 	if (error)
 		goto fail_unmount;
 
@@ -863,7 +817,7 @@ xfs_fs_fill_super(
 	sb->s_time_gran = 1;
 	set_posix_acl_flag(sb);
 
-	VFS_ROOT(vfsp, &rootvp, error);
+	error = bhv_vfs_root(vfsp, &rootvp);
 	if (error)
 		goto fail_unmount;
 
@@ -892,7 +846,7 @@ fail_vnrele:
 	}
 
 fail_unmount:
-	VFS_UNMOUNT(vfsp, 0, NULL, error2);
+	bhv_vfs_unmount(vfsp, 0, NULL);
 
 fail_vfsop:
 	vfs_deallocate(vfsp);
@@ -900,14 +854,16 @@ fail_vfsop:
 	return -error;
 }
 
-STATIC struct super_block *
+STATIC int
 xfs_fs_get_sb(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
-	void			*data)
+	void			*data,
+	struct vfsmount		*mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
+			   mnt);
 }
 
 STATIC struct super_operations xfs_super_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 376b96cb513..33dd1ca1324 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -105,7 +105,7 @@ struct block_device;
 
 extern __uint64_t xfs_max_file_offset(unsigned int);
 
-extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
+extern void xfs_initialize_vnode(bhv_desc_t *, bhv_vnode_t *, bhv_desc_t *, int);
 
 extern void xfs_flush_inode(struct xfs_inode *);
 extern void xfs_flush_device(struct xfs_inode *);
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7079cc83721..af246532fbf 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -38,7 +38,7 @@ xfs_stats_clear_proc_handler(
 
 	if (!ret && write && *valp) {
 		printk("XFS Clearing xfsstats\n");
-		for_each_cpu(c) {
+		for_each_possible_cpu(c) {
 			preempt_disable();
 			/* save vn_active, it's a universal truth! */
 			vn_active = per_cpu(xfsstats, c).vn_active;
@@ -120,6 +120,11 @@ STATIC ctl_table xfs_table[] = {
 	&sysctl_intvec, NULL,
 	&xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
 
+	{XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max},
+
 	/* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
 	{XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index bc8c11f1372..a631fb8cc5a 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -46,6 +46,7 @@ typedef struct xfs_param {
 	xfs_sysctl_val_t xfs_buf_age;	/* Metadata buffer age before flush. */
 	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
 	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
+	xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
 } xfs_param_t;
 
 /*
@@ -84,6 +85,7 @@ enum {
 	/* XFS_IO_BYPASS = 18 */
 	XFS_INHERIT_NOSYM = 19,
 	XFS_ROTORSTEP = 20,
+	XFS_INHERIT_NODFRG = 21,
 };
 
 extern xfs_param_t	xfs_params;
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 6f7c9f7a862..6145e8bd0be 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_imap.h"
 #include "xfs_alloc.h"
@@ -104,7 +103,7 @@ vfs_mntupdate(
 int
 vfs_root(
 	struct bhv_desc		*bdp,
-	struct vnode		**vpp)
+	struct bhv_vnode	**vpp)
 {
 	struct bhv_desc		*next = bdp;
 
@@ -117,15 +116,15 @@ vfs_root(
 int
 vfs_statvfs(
 	struct bhv_desc		*bdp,
-	xfs_statfs_t		*sp,
-	struct vnode		*vp)
+	bhv_statvfs_t		*statp,
+	struct bhv_vnode	*vp)
 {
 	struct bhv_desc		*next = bdp;
 
 	ASSERT(next);
 	while (! (bhvtovfsops(next))->vfs_statvfs)
 		next = BHV_NEXT(next);
-	return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+	return ((*bhvtovfsops(next)->vfs_statvfs)(next, statp, vp));
 }
 
 int
@@ -145,7 +144,7 @@ vfs_sync(
 int
 vfs_vget(
 	struct bhv_desc		*bdp,
-	struct vnode		**vpp,
+	struct bhv_vnode	**vpp,
 	struct fid		*fidp)
 {
 	struct bhv_desc		*next = bdp;
@@ -187,7 +186,7 @@ vfs_quotactl(
 void
 vfs_init_vnode(
 	struct bhv_desc		*bdp,
-	struct vnode		*vp,
+	struct bhv_vnode	*vp,
 	struct bhv_desc		*bp,
 	int			unlock)
 {
@@ -226,13 +225,13 @@ vfs_freeze(
 	((*bhvtovfsops(next)->vfs_freeze)(next));
 }
 
-vfs_t *
+bhv_vfs_t *
 vfs_allocate(
 	struct super_block	*sb)
 {
-	struct vfs		*vfsp;
+	struct bhv_vfs		*vfsp;
 
-	vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+	vfsp = kmem_zalloc(sizeof(bhv_vfs_t), KM_SLEEP);
 	bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
 	INIT_LIST_HEAD(&vfsp->vfs_sync_list);
 	spin_lock_init(&vfsp->vfs_sync_lock);
@@ -247,25 +246,25 @@ vfs_allocate(
 	return vfsp;
 }
 
-vfs_t *
+bhv_vfs_t *
 vfs_from_sb(
 	struct super_block	*sb)
 {
-	return (vfs_t *)sb->s_fs_info;
+	return (bhv_vfs_t *)sb->s_fs_info;
 }
 
 void
 vfs_deallocate(
-	struct vfs		*vfsp)
+	struct bhv_vfs		*vfsp)
 {
 	bhv_head_destroy(VFS_BHVHEAD(vfsp));
-	kmem_free(vfsp, sizeof(vfs_t));
+	kmem_free(vfsp, sizeof(bhv_vfs_t));
 }
 
 void
 vfs_insertops(
-	struct vfs		*vfsp,
-	struct bhv_vfsops	*vfsops)
+	struct bhv_vfs		*vfsp,
+	struct bhv_module_vfsops *vfsops)
 {
 	struct bhv_desc		*bdp;
 
@@ -276,9 +275,9 @@ vfs_insertops(
 
 void
 vfs_insertbhv(
-	struct vfs		*vfsp,
+	struct bhv_vfs		*vfsp,
 	struct bhv_desc		*bdp,
-	struct vfsops		*vfsops,
+	struct bhv_vfsops	*vfsops,
 	void			*mount)
 {
 	bhv_desc_init(bdp, mount, vfsp, vfsops);
@@ -287,7 +286,7 @@ vfs_insertbhv(
 
 void
 bhv_remove_vfsops(
-	struct vfs		*vfsp,
+	struct bhv_vfs		*vfsp,
 	int			pos)
 {
 	struct bhv_desc		*bhv;
@@ -301,7 +300,7 @@ bhv_remove_vfsops(
 
 void
 bhv_remove_all_vfsops(
-	struct vfs		*vfsp,
+	struct bhv_vfs		*vfsp,
 	int			freebase)
 {
 	struct xfs_mount	*mp;
@@ -317,7 +316,7 @@ bhv_remove_all_vfsops(
 
 void
 bhv_insert_all_vfsops(
-	struct vfs		*vfsp)
+	struct bhv_vfs		*vfsp)
 {
 	struct xfs_mount	*mp;
 
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 841200c0309..91fc2c4b335 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -21,42 +21,40 @@
 #include <linux/vfs.h>
 #include "xfs_fs.h"
 
+struct bhv_vfs;
+struct bhv_vnode;
+
 struct fid;
-struct vfs;
 struct cred;
-struct vnode;
-struct kstatfs;
 struct seq_file;
 struct super_block;
 struct xfs_mount_args;
 
-typedef struct kstatfs xfs_statfs_t;
+typedef struct kstatfs	bhv_statvfs_t;
 
-typedef struct vfs_sync_work {
+typedef struct bhv_vfs_sync_work {
 	struct list_head	w_list;
-	struct vfs		*w_vfs;
+	struct bhv_vfs		*w_vfs;
 	void			*w_data;	/* syncer routine argument */
-	void			(*w_syncer)(struct vfs *, void *);
-} vfs_sync_work_t;
+	void			(*w_syncer)(struct bhv_vfs *, void *);
+} bhv_vfs_sync_work_t;
 
-typedef struct vfs {
+typedef struct bhv_vfs {
 	u_int			vfs_flag;	/* flags */
 	xfs_fsid_t		vfs_fsid;	/* file system ID */
 	xfs_fsid_t		*vfs_altfsid;	/* An ID fixed for life of FS */
 	bhv_head_t		vfs_bh;		/* head of vfs behavior chain */
 	struct super_block	*vfs_super;	/* generic superblock pointer */
 	struct task_struct	*vfs_sync_task;	/* generalised sync thread */
-	vfs_sync_work_t		vfs_sync_work;	/* work item for VFS_SYNC */
+	bhv_vfs_sync_work_t	vfs_sync_work;	/* work item for VFS_SYNC */
 	struct list_head	vfs_sync_list;	/* sync thread work item list */
 	spinlock_t		vfs_sync_lock;	/* work item list lock */
-	int 			vfs_sync_seq;	/* sync thread generation no. */
+	int			vfs_sync_seq;	/* sync thread generation no. */
 	wait_queue_head_t	vfs_wait_single_sync_task;
-} vfs_t;
-
-#define vfs_fbhv		vfs_bh.bh_first	/* 1st on vfs behavior chain */
+} bhv_vfs_t;
 
-#define bhvtovfs(bdp)		( (struct vfs *)BHV_VOBJ(bdp) )
-#define bhvtovfsops(bdp)	( (struct vfsops *)BHV_OPS(bdp) )
+#define bhvtovfs(bdp)		( (struct bhv_vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp)	( (struct bhv_vfsops *)BHV_OPS(bdp) )
 #define VFS_BHVHEAD(vfs)	( &(vfs)->vfs_bh )
 #define VFS_REMOVEBHV(vfs, bdp)	( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
 
@@ -71,7 +69,7 @@ typedef enum {
 	VFS_BHV_QM,		/* quota manager */
 	VFS_BHV_IO,		/* IO path */
 	VFS_BHV_END		/* housekeeping end-of-range */
-} vfs_bhv_t;
+} bhv_vfs_type_t;
 
 #define VFS_POSITION_XFS	(BHV_POSITION_BASE)
 #define VFS_POSITION_DM		(VFS_POSITION_BASE+10)
@@ -81,8 +79,9 @@ typedef enum {
 #define VFS_RDONLY		0x0001	/* read-only vfs */
 #define VFS_GRPID		0x0002	/* group-ID assigned from directory */
 #define VFS_DMI			0x0004	/* filesystem has the DMI enabled */
-#define VFS_32BITINODES		0x0008	/* do not use inums above 32 bits */
-#define VFS_END			0x0008	/* max flag */
+#define VFS_UMOUNT		0x0008	/* unmount in progress */
+#define VFS_32BITINODES		0x0010	/* do not use inums above 32 bits */
+#define VFS_END			0x0010	/* max flag */
 
 #define SYNC_ATTR		0x0001	/* sync attributes */
 #define SYNC_CLOSE		0x0002	/* close file system down */
@@ -92,7 +91,14 @@ typedef enum {
 #define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE		0x0100  /* quiesce filesystem for a snapshot */
+#define SYNC_QUIESCE		0x0100  /* quiesce fileystem for a snapshot */
+
+#define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
+#define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
+#define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
+#define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */
+#define SHUTDOWN_REMOTE_REQ	0x0010	/* shutdown came from remote cell */
+#define SHUTDOWN_DEVICE_REQ	0x0020	/* failed all paths to the device */
 
 typedef int	(*vfs_mount_t)(bhv_desc_t *,
 				struct xfs_mount_args *, struct cred *);
@@ -102,18 +108,19 @@ typedef	int	(*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
 typedef int	(*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
 typedef int	(*vfs_mntupdate_t)(bhv_desc_t *, int *,
 				struct xfs_mount_args *);
-typedef int	(*vfs_root_t)(bhv_desc_t *, struct vnode **);
-typedef int	(*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+typedef int	(*vfs_root_t)(bhv_desc_t *, struct bhv_vnode **);
+typedef int	(*vfs_statvfs_t)(bhv_desc_t *, bhv_statvfs_t *,
+				struct bhv_vnode *);
 typedef int	(*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
-typedef int	(*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int	(*vfs_vget_t)(bhv_desc_t *, struct bhv_vnode **, struct fid *);
 typedef int	(*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
 typedef int	(*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
 typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
-				struct vnode *, bhv_desc_t *, int);
+				struct bhv_vnode *, bhv_desc_t *, int);
 typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
 typedef void	(*vfs_freeze_t)(bhv_desc_t *);
 
-typedef struct vfsops {
+typedef struct bhv_vfsops {
 	bhv_position_t		vf_position;	/* behavior chain position */
 	vfs_mount_t		vfs_mount;	/* mount file system */
 	vfs_parseargs_t		vfs_parseargs;	/* parse mount options */
@@ -129,82 +136,82 @@ typedef struct vfsops {
 	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
 	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
 	vfs_freeze_t		vfs_freeze;	/* freeze fs for snapshot */
-} vfsops_t;
+} bhv_vfsops_t;
 
 /*
- * VFS's.  Operates on vfs structure pointers (starts at bhv head).
+ * Virtual filesystem operations, operating from head bhv.
  */
-#define VHEAD(v)			((v)->vfs_fbhv)
-#define VFS_MOUNT(v, ma,cr, rv)		((rv) = vfs_mount(VHEAD(v), ma,cr))
-#define VFS_PARSEARGS(v, o,ma,f, rv)	((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
-#define VFS_SHOWARGS(v, m, rv)		((rv) = vfs_showargs(VHEAD(v), m))
-#define VFS_UNMOUNT(v, f, cr, rv)	((rv) = vfs_unmount(VHEAD(v), f,cr))
-#define VFS_MNTUPDATE(v, fl, args, rv)	((rv) = vfs_mntupdate(VHEAD(v), fl, args))
-#define VFS_ROOT(v, vpp, rv)		((rv) = vfs_root(VHEAD(v), vpp))
-#define VFS_STATVFS(v, sp,vp, rv)	((rv) = vfs_statvfs(VHEAD(v), sp,vp))
-#define VFS_SYNC(v, flag,cr, rv)	((rv) = vfs_sync(VHEAD(v), flag,cr))
-#define VFS_VGET(v, vpp,fidp, rv)	((rv) = vfs_vget(VHEAD(v), vpp,fidp))
-#define VFS_DMAPIOPS(v, p, rv)		((rv) = vfs_dmapiops(VHEAD(v), p))
-#define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
-#define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
-#define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
-#define VFS_FREEZE(v)			( vfs_freeze(VHEAD(v)) )
+#define VFSHEAD(v)			((v)->vfs_bh.bh_first)
+#define bhv_vfs_mount(v, ma,cr)		vfs_mount(VFSHEAD(v), ma,cr)
+#define bhv_vfs_parseargs(v, o,ma,f)	vfs_parseargs(VFSHEAD(v), o,ma,f)
+#define bhv_vfs_showargs(v, m)		vfs_showargs(VFSHEAD(v), m)
+#define bhv_vfs_unmount(v, f,cr)	vfs_unmount(VFSHEAD(v), f,cr)
+#define bhv_vfs_mntupdate(v, fl,args)	vfs_mntupdate(VFSHEAD(v), fl,args)
+#define bhv_vfs_root(v, vpp)		vfs_root(VFSHEAD(v), vpp)
+#define bhv_vfs_statvfs(v, sp,vp)	vfs_statvfs(VFSHEAD(v), sp,vp)
+#define bhv_vfs_sync(v, flag,cr)	vfs_sync(VFSHEAD(v), flag,cr)
+#define bhv_vfs_vget(v, vpp,fidp)	vfs_vget(VFSHEAD(v), vpp,fidp)
+#define bhv_vfs_dmapiops(v, p)		vfs_dmapiops(VFSHEAD(v), p)
+#define bhv_vfs_quotactl(v, c,id,p)	vfs_quotactl(VFSHEAD(v), c,id,p)
+#define bhv_vfs_init_vnode(v, vp,b,ul)	vfs_init_vnode(VFSHEAD(v), vp,b,ul)
+#define bhv_vfs_force_shutdown(v,u,f,l)	vfs_force_shutdown(VFSHEAD(v), u,f,l)
+#define bhv_vfs_freeze(v)		vfs_freeze(VFSHEAD(v))
 
 /*
- * PVFS's.  Operates on behavior descriptor pointers.
+ * Virtual filesystem operations, operating from next bhv.
  */
-#define PVFS_MOUNT(b, ma,cr, rv)	((rv) = vfs_mount(b, ma,cr))
-#define PVFS_PARSEARGS(b, o,ma,f, rv)	((rv) = vfs_parseargs(b, o,ma,f))
-#define PVFS_SHOWARGS(b, m, rv)		((rv) = vfs_showargs(b, m))
-#define PVFS_UNMOUNT(b, f,cr, rv)	((rv) = vfs_unmount(b, f,cr))
-#define PVFS_MNTUPDATE(b, fl, args, rv)	((rv) = vfs_mntupdate(b, fl, args))
-#define PVFS_ROOT(b, vpp, rv)		((rv) = vfs_root(b, vpp))
-#define PVFS_STATVFS(b, sp,vp, rv)	((rv) = vfs_statvfs(b, sp,vp))
-#define PVFS_SYNC(b, flag,cr, rv)	((rv) = vfs_sync(b, flag,cr))
-#define PVFS_VGET(b, vpp,fidp, rv)	((rv) = vfs_vget(b, vpp,fidp))
-#define PVFS_DMAPIOPS(b, p, rv)		((rv) = vfs_dmapiops(b, p))
-#define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
-#define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
-#define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
-#define PVFS_FREEZE(b)			( vfs_freeze(b) )
+#define bhv_next_vfs_mount(b, ma,cr)		vfs_mount(b, ma,cr)
+#define bhv_next_vfs_parseargs(b, o,ma,f)	vfs_parseargs(b, o,ma,f)
+#define bhv_next_vfs_showargs(b, m)		vfs_showargs(b, m)
+#define bhv_next_vfs_unmount(b, f,cr)		vfs_unmount(b, f,cr)
+#define bhv_next_vfs_mntupdate(b, fl,args)	vfs_mntupdate(b, fl, args)
+#define bhv_next_vfs_root(b, vpp)		vfs_root(b, vpp)
+#define bhv_next_vfs_statvfs(b, sp,vp)		vfs_statvfs(b, sp,vp)
+#define bhv_next_vfs_sync(b, flag,cr)		vfs_sync(b, flag,cr)
+#define bhv_next_vfs_vget(b, vpp,fidp)		vfs_vget(b, vpp,fidp)
+#define bhv_next_vfs_dmapiops(b, p)		vfs_dmapiops(b, p)
+#define bhv_next_vfs_quotactl(b, c,id,p)	vfs_quotactl(b, c,id,p)
+#define bhv_next_vfs_init_vnode(b, vp,b2,ul)	vfs_init_vnode(b, vp,b2,ul)
+#define bhv_next_force_shutdown(b, fl,f,l)	vfs_force_shutdown(b, fl,f,l)
+#define bhv_next_vfs_freeze(b)			vfs_freeze(b)
 
 extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
 extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
 extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
 extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
 extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
-extern int vfs_root(bhv_desc_t *, struct vnode **);
-extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+extern int vfs_root(bhv_desc_t *, struct bhv_vnode **);
+extern int vfs_statvfs(bhv_desc_t *, bhv_statvfs_t *, struct bhv_vnode *);
 extern int vfs_sync(bhv_desc_t *, int, struct cred *);
-extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_vget(bhv_desc_t *, struct bhv_vnode **, struct fid *);
 extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
 extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
-extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_init_vnode(bhv_desc_t *, struct bhv_vnode *, bhv_desc_t *, int);
 extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
 extern void vfs_freeze(bhv_desc_t *);
 
-typedef struct bhv_vfsops {
-	struct vfsops		bhv_common;
+#define vfs_test_for_freeze(vfs)	((vfs)->vfs_super->s_frozen)
+#define vfs_wait_for_freeze(vfs,l)	vfs_check_frozen((vfs)->vfs_super, (l))
+ 
+typedef struct bhv_module_vfsops {
+	struct bhv_vfsops	bhv_common;
 	void *			bhv_custom;
-} bhv_vfsops_t;
+} bhv_module_vfsops_t;
 
-#define vfs_bhv_lookup(v, id)	( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
-#define vfs_bhv_custom(b)	( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
-#define vfs_bhv_set_custom(b,o)	( (b)->bhv_custom = (void *)(o))
-#define vfs_bhv_clr_custom(b)	( (b)->bhv_custom = NULL )
+#define vfs_bhv_lookup(v, id)	(bhv_lookup_range(&(v)->vfs_bh, (id), (id)))
+#define vfs_bhv_custom(b)	(((bhv_module_vfsops_t*)BHV_OPS(b))->bhv_custom)
+#define vfs_bhv_set_custom(b,o)	((b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b)	((b)->bhv_custom = NULL)
 
-extern vfs_t *vfs_allocate(struct super_block *);
-extern vfs_t *vfs_from_sb(struct super_block *);
-extern void vfs_deallocate(vfs_t *);
-extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
-extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+extern bhv_vfs_t *vfs_allocate(struct super_block *);
+extern bhv_vfs_t *vfs_from_sb(struct super_block *);
+extern void vfs_deallocate(bhv_vfs_t *);
+extern void vfs_insertbhv(bhv_vfs_t *, bhv_desc_t *, bhv_vfsops_t *, void *);
 
-extern void bhv_insert_all_vfsops(struct vfs *);
-extern void bhv_remove_all_vfsops(struct vfs *, int);
-extern void bhv_remove_vfsops(struct vfs *, int);
+extern void vfs_insertops(bhv_vfs_t *, bhv_module_vfsops_t *);
 
-#define fs_frozen(vfsp)		((vfsp)->vfs_super->s_frozen)
-#define fs_check_frozen(vfsp, level) \
-	vfs_check_frozen(vfsp->vfs_super, level);
+extern void bhv_insert_all_vfsops(struct bhv_vfs *);
+extern void bhv_remove_all_vfsops(struct bhv_vfs *, int);
+extern void bhv_remove_vfsops(struct bhv_vfs *, int);
 
 #endif	/* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index d27c25b27cc..6628d96b6fd 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -39,7 +39,7 @@ vn_init(void)
 
 void
 vn_iowait(
-	struct vnode	*vp)
+	bhv_vnode_t	*vp)
 {
 	wait_queue_head_t *wq = vptosync(vp);
 
@@ -48,17 +48,33 @@ vn_iowait(
 
 void
 vn_iowake(
-	struct vnode	*vp)
+	bhv_vnode_t	*vp)
 {
 	if (atomic_dec_and_test(&vp->v_iocount))
 		wake_up(vptosync(vp));
 }
 
-struct vnode *
+/*
+ * Volume managers supporting multiple paths can send back ENODEV when the
+ * final path disappears.  In this case continuing to fill the page cache
+ * with dirty data which cannot be written out is evil, so prevent that.
+ */
+void
+vn_ioerror(
+	bhv_vnode_t	*vp,
+	int		error,
+	char		*f,
+	int		l)
+{
+	if (unlikely(error == -ENODEV))
+		bhv_vfs_force_shutdown(vp->v_vfsp, SHUTDOWN_DEVICE_REQ, f, l);
+}
+
+bhv_vnode_t *
 vn_initialize(
 	struct inode	*inode)
 {
-	struct vnode	*vp = vn_from_inode(inode);
+	bhv_vnode_t	*vp = vn_from_inode(inode);
 
 	XFS_STATS_INC(vn_active);
 	XFS_STATS_INC(vn_alloc);
@@ -94,8 +110,8 @@ vn_initialize(
  */
 void
 vn_revalidate_core(
-	struct vnode	*vp,
-	vattr_t		*vap)
+	bhv_vnode_t	*vp,
+	bhv_vattr_t	*vap)
 {
 	struct inode	*inode = vn_to_inode(vp);
 
@@ -130,14 +146,14 @@ vn_revalidate_core(
  */
 int
 __vn_revalidate(
-	struct vnode	*vp,
-	struct vattr	*vattr)
+	bhv_vnode_t	*vp,
+	bhv_vattr_t	*vattr)
 {
 	int		error;
 
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
 	vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS;
-	VOP_GETATTR(vp, vattr, 0, NULL, error);
+	error = bhv_vop_getattr(vp, vattr, 0, NULL);
 	if (likely(!error)) {
 		vn_revalidate_core(vp, vattr);
 		VUNMODIFY(vp);
@@ -147,9 +163,9 @@ __vn_revalidate(
 
 int
 vn_revalidate(
-	struct vnode	*vp)
+	bhv_vnode_t	*vp)
 {
-	vattr_t		vattr;
+	bhv_vattr_t	vattr;
 
 	return __vn_revalidate(vp, &vattr);
 }
@@ -157,9 +173,9 @@ vn_revalidate(
 /*
  * Add a reference to a referenced vnode.
  */
-struct vnode *
+bhv_vnode_t *
 vn_hold(
-	struct vnode	*vp)
+	bhv_vnode_t	*vp)
 {
 	struct inode	*inode;
 
@@ -192,31 +208,31 @@ vn_hold(
  * Vnode tracing code.
  */
 void
-vn_trace_entry(vnode_t *vp, const char *func, inst_t *ra)
+vn_trace_entry(bhv_vnode_t *vp, const char *func, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
 }
 
 void
-vn_trace_exit(vnode_t *vp, const char *func, inst_t *ra)
+vn_trace_exit(bhv_vnode_t *vp, const char *func, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
 }
 
 void
-vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
+vn_trace_hold(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
 }
 
 void
-vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
+vn_trace_ref(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
 }
 
 void
-vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
+vn_trace_rele(bhv_vnode_t *vp, char *file, int line, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
 }
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 2a8e16c2235..35c6a01963a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -14,57 +14,35 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- *
- * Portions Copyright (c) 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
  */
 #ifndef __XFS_VNODE_H__
 #define __XFS_VNODE_H__
 
 struct uio;
 struct file;
-struct vattr;
+struct bhv_vfs;
+struct bhv_vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
 
+typedef struct dentry	bhv_vname_t;
+typedef __u64		bhv_vnumber_t;
 
-typedef xfs_ino_t vnumber_t;
-typedef struct dentry vname_t;
-typedef bhv_head_t vn_bhv_head_t;
+typedef enum bhv_vflags {
+	VMODIFIED	= 0x08,	/* XFS inode state possibly differs */
+				/* to the Linux inode state. */
+	VTRUNCATED	= 0x40,	/* truncated down so flush-on-close */
+} bhv_vflags_t;
 
 /*
  * MP locking protocols:
  *	v_flag, v_vfsp				VN_LOCK/VN_UNLOCK
  */
-typedef struct vnode {
-	__u32		v_flag;			/* vnode flags (see below) */
-	struct vfs	*v_vfsp;		/* ptr to containing VFS */
-	vnumber_t	v_number;		/* in-core vnode number */
-	vn_bhv_head_t	v_bh;			/* behavior head */
+typedef struct bhv_vnode {
+	bhv_vflags_t	v_flag;			/* vnode flags (see above) */
+	bhv_vfs_t	*v_vfsp;		/* ptr to containing VFS */
+	bhv_vnumber_t	v_number;		/* in-core vnode number */
+	bhv_head_t	v_bh;			/* behavior head */
 	spinlock_t	v_lock;			/* VN_LOCK/VN_UNLOCK */
 	atomic_t	v_iocount;		/* outstanding I/O count */
 #ifdef XFS_VNODE_TRACE
@@ -72,7 +50,7 @@ typedef struct vnode {
 #endif
 	struct inode	v_inode;		/* Linux inode */
 	/* inode MUST be last */
-} vnode_t;
+} bhv_vnode_t;
 
 #define VN_ISLNK(vp)	S_ISLNK((vp)->v_inode.i_mode)
 #define VN_ISREG(vp)	S_ISREG((vp)->v_inode.i_mode)
@@ -80,9 +58,6 @@ typedef struct vnode {
 #define VN_ISCHR(vp)	S_ISCHR((vp)->v_inode.i_mode)
 #define VN_ISBLK(vp)	S_ISBLK((vp)->v_inode.i_mode)
 
-#define v_fbhv			v_bh.bh_first	       /* first behavior */
-#define v_fops			v_bh.bh_first->bd_ops  /* first behavior ops */
-
 #define VNODE_POSITION_BASE	BHV_POSITION_BASE	/* chain bottom */
 #define VNODE_POSITION_TOP	BHV_POSITION_TOP	/* chain top */
 #define VNODE_POSITION_INVALID	BHV_POSITION_INVALID	/* invalid pos. num */
@@ -104,8 +79,8 @@ typedef enum {
 /*
  * Macros for dealing with the behavior descriptor inside of the vnode.
  */
-#define BHV_TO_VNODE(bdp)	((vnode_t *)BHV_VOBJ(bdp))
-#define BHV_TO_VNODE_NULL(bdp)	((vnode_t *)BHV_VOBJNULL(bdp))
+#define BHV_TO_VNODE(bdp)	((bhv_vnode_t *)BHV_VOBJ(bdp))
+#define BHV_TO_VNODE_NULL(bdp)	((bhv_vnode_t *)BHV_VOBJNULL(bdp))
 
 #define VN_BHV_HEAD(vp)			((bhv_head_t *)(&((vp)->v_bh)))
 #define vn_bhv_head_init(bhp,name)	bhv_head_init(bhp,name)
@@ -116,35 +91,29 @@ typedef enum {
 /*
  * Vnode to Linux inode mapping.
  */
-static inline struct vnode *vn_from_inode(struct inode *inode)
+static inline struct bhv_vnode *vn_from_inode(struct inode *inode)
 {
-	return (vnode_t *)list_entry(inode, vnode_t, v_inode);
+	return (bhv_vnode_t *)list_entry(inode, bhv_vnode_t, v_inode);
 }
-static inline struct inode *vn_to_inode(struct vnode *vnode)
+static inline struct inode *vn_to_inode(struct bhv_vnode *vnode)
 {
 	return &vnode->v_inode;
 }
 
 /*
- * Vnode flags.
- */
-#define VMODIFIED	       0x8	/* XFS inode state possibly differs */
-					/* to the Linux inode state.	*/
-
-/*
- * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
+ * Values for the vop_rwlock/rwunlock flags parameter.
  */
-typedef enum vrwlock {
+typedef enum bhv_vrwlock {
 	VRWLOCK_NONE,
 	VRWLOCK_READ,
 	VRWLOCK_WRITE,
 	VRWLOCK_WRITE_DIRECT,
 	VRWLOCK_TRY_READ,
 	VRWLOCK_TRY_WRITE
-} vrwlock_t;
+} bhv_vrwlock_t;
 
 /*
- * Return values for VOP_INACTIVE.  A return value of
+ * Return values for bhv_vop_inactive.  A return value of
  * VN_INACTIVE_NOCACHE implies that the file system behavior
  * has disassociated its state and bhv_desc_t from the vnode.
  */
@@ -152,18 +121,20 @@ typedef enum vrwlock {
 #define	VN_INACTIVE_NOCACHE	1
 
 /*
- * Values for the cmd code given to VOP_VNODE_CHANGE.
+ * Values for the cmd code given to vop_vnode_change.
  */
-typedef enum vchange {
+typedef enum bhv_vchange {
 	VCHANGE_FLAGS_FRLOCKS		= 0,
 	VCHANGE_FLAGS_ENF_LOCKING	= 1,
 	VCHANGE_FLAGS_TRUNCATED		= 2,
 	VCHANGE_FLAGS_PAGE_DIRTY	= 3,
 	VCHANGE_FLAGS_IOEXCL_COUNT	= 4
-} vchange_t;
+} bhv_vchange_t;
 
+typedef enum { L_FALSE, L_TRUE } lastclose_t;
 
 typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
+typedef int	(*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
 typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
@@ -181,27 +152,27 @@ typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
 				struct cred *);
 typedef int	(*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
 				int, unsigned int, void __user *);
-typedef int	(*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
+typedef int	(*vop_getattr_t)(bhv_desc_t *, struct bhv_vattr *, int,
 				struct cred *);
-typedef int	(*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
+typedef int	(*vop_setattr_t)(bhv_desc_t *, struct bhv_vattr *, int,
 				struct cred *);
 typedef int	(*vop_access_t)(bhv_desc_t *, int, struct cred *);
-typedef int	(*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
-				int, vnode_t *, struct cred *);
-typedef int	(*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
-				vnode_t **, struct cred *);
-typedef int	(*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
-typedef int	(*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
-				struct cred *);
-typedef int	(*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
+typedef int	(*vop_lookup_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t **,
+				int, bhv_vnode_t *, struct cred *);
+typedef int	(*vop_create_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *,
+				bhv_vnode_t **, struct cred *);
+typedef int	(*vop_remove_t)(bhv_desc_t *, bhv_vname_t *, struct cred *);
+typedef int	(*vop_link_t)(bhv_desc_t *, bhv_vnode_t *, bhv_vname_t *,
 				struct cred *);
-typedef int	(*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
-				vnode_t **, struct cred *);
-typedef int	(*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int	(*vop_rename_t)(bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *,
+				bhv_vname_t *, struct cred *);
+typedef int	(*vop_mkdir_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr *,
+				bhv_vnode_t **, struct cred *);
+typedef int	(*vop_rmdir_t)(bhv_desc_t *, bhv_vname_t *, struct cred *);
 typedef int	(*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
 				int *);
-typedef int	(*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
-				char *, vnode_t **, struct cred *);
+typedef int	(*vop_symlink_t)(bhv_desc_t *, bhv_vname_t *, struct bhv_vattr*,
+				char *, bhv_vnode_t **, struct cred *);
 typedef int	(*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
 				struct cred *);
 typedef int	(*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
@@ -209,8 +180,8 @@ typedef int	(*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
 typedef int	(*vop_inactive_t)(bhv_desc_t *, struct cred *);
 typedef int	(*vop_fid2_t)(bhv_desc_t *, struct fid *);
 typedef int	(*vop_release_t)(bhv_desc_t *);
-typedef int	(*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
-typedef void	(*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
+typedef int	(*vop_rwlock_t)(bhv_desc_t *, bhv_vrwlock_t);
+typedef void	(*vop_rwunlock_t)(bhv_desc_t *, bhv_vrwlock_t);
 typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
 				struct xfs_iomap *, int *);
 typedef int	(*vop_reclaim_t)(bhv_desc_t *);
@@ -222,8 +193,8 @@ typedef	int	(*vop_attr_remove_t)(bhv_desc_t *, const char *,
 				int, struct cred *);
 typedef	int	(*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
 				struct attrlist_cursor_kern *, struct cred *);
-typedef void	(*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
-typedef void	(*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
+typedef void	(*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int);
+typedef void	(*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t);
 typedef void	(*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
 typedef void	(*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
 typedef int	(*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
@@ -231,9 +202,10 @@ typedef int	(*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
 typedef int	(*vop_iflush_t)(bhv_desc_t *, int);
 
 
-typedef struct vnodeops {
+typedef struct bhv_vnodeops {
 	bhv_position_t  vn_position;    /* position within behavior chain */
 	vop_open_t		vop_open;
+	vop_close_t		vop_close;
 	vop_read_t		vop_read;
 	vop_write_t		vop_write;
 	vop_sendfile_t		vop_sendfile;
@@ -271,103 +243,80 @@ typedef struct vnodeops {
 	vop_pflushvp_t		vop_flush_pages;
 	vop_release_t		vop_release;
 	vop_iflush_t		vop_iflush;
-} vnodeops_t;
+} bhv_vnodeops_t;
 
 /*
- * VOP's.
- */
-#define _VOP_(op, vp)	(*((vnodeops_t *)(vp)->v_fops)->op)
-
-#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv)			\
-	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
-#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv)		\
-	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
-#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv)		\
-	rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
-#define VOP_SPLICE_READ(vp,f,o,pipe,cnt,fl,iofl,cr,rv)			\
-	rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
-#define VOP_SPLICE_WRITE(vp,f,o,pipe,cnt,fl,iofl,cr,rv)			\
-	rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
-#define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
-	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
-#define VOP_OPEN(vp, cr, rv)						\
-	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
-#define VOP_GETATTR(vp, vap, f, cr, rv)					\
-	rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
-#define	VOP_SETATTR(vp, vap, f, cr, rv)					\
-	rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
-#define	VOP_ACCESS(vp, mode, cr, rv)					\
-	rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
-#define	VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv)				\
-	rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
-#define VOP_CREATE(dvp,d,vap,vpp,cr,rv)					\
-	rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
-#define VOP_REMOVE(dvp,d,cr,rv)						\
-	rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
-#define	VOP_LINK(tdvp,fvp,d,cr,rv)					\
-	rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
-#define	VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv)				\
-	rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
-#define	VOP_MKDIR(dp,d,vap,vpp,cr,rv)					\
-	rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
-#define	VOP_RMDIR(dp,d,cr,rv)	 					\
-	rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
-#define	VOP_READDIR(vp,uiop,cr,eofp,rv)					\
-	rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
-#define	VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv)				\
-	rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
-#define	VOP_READLINK(vp,uiop,fl,cr,rv)					\
-	rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
-#define	VOP_FSYNC(vp,f,cr,b,e,rv)					\
-	rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
-#define VOP_INACTIVE(vp, cr, rv)					\
-	rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
-#define VOP_RELEASE(vp, rv)						\
-	rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
-#define VOP_FID2(vp, fidp, rv)						\
-	rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
-#define VOP_RWLOCK(vp,i)						\
-	(void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
-#define VOP_RWLOCK_TRY(vp,i)						\
-	_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
-#define VOP_RWUNLOCK(vp,i)						\
-	(void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
-#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv)				\
-	rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
-#define VOP_RECLAIM(vp, rv)						\
-	rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
-#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv)		\
-	rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
-#define	VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv)		\
-	rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
-#define	VOP_ATTR_REMOVE(vp, name, flags, cred, rv)			\
-	rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
-#define	VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv)		\
-	rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
-#define VOP_LINK_REMOVED(vp, dvp, linkzero)				\
-	(void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
-#define VOP_VNODE_CHANGE(vp, cmd, val)					\
-	(void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
-/*
- * These are page cache functions that now go thru VOPs.
- * 'last' parameter is unused and left in for IRIX compatibility
+ * Virtual node operations, operating from head bhv.
  */
-#define VOP_TOSS_PAGES(vp, first, last, fiopt)				\
-	_VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
-/*
- * 'last' parameter is unused and left in for IRIX compatibility
- */
-#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt)			\
-	_VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
-/*
- * 'last' parameter is unused and left in for IRIX compatibility
- */
-#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv)		\
-	rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
-#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv)			\
-	rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
-#define VOP_IFLUSH(vp, flags, rv)					\
-	rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
+#define VNHEAD(vp)	((vp)->v_bh.bh_first)
+#define VOP(op, vp)	(*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
+#define bhv_vop_open(vp, cr)		VOP(vop_open, vp)(VNHEAD(vp),cr)
+#define bhv_vop_close(vp, f,last,cr)	VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
+#define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr)		\
+		VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
+#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)		\
+		VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
+#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr)		\
+		VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
+#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr)			\
+		VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
+#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr)		\
+		VOP(vop_splice_write, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
+#define bhv_vop_bmap(vp,of,sz,rw,b,n)					\
+		VOP(vop_bmap, vp)(VNHEAD(vp),of,sz,rw,b,n)
+#define bhv_vop_getattr(vp, vap,f,cr)					\
+		VOP(vop_getattr, vp)(VNHEAD(vp), vap,f,cr)
+#define	bhv_vop_setattr(vp, vap,f,cr)					\
+		VOP(vop_setattr, vp)(VNHEAD(vp), vap,f,cr)
+#define	bhv_vop_access(vp, mode,cr)	VOP(vop_access, vp)(VNHEAD(vp), mode,cr)
+#define	bhv_vop_lookup(vp,d,vpp,f,rdir,cr)				\
+		VOP(vop_lookup, vp)(VNHEAD(vp),d,vpp,f,rdir,cr)
+#define bhv_vop_create(dvp,d,vap,vpp,cr)				\
+		VOP(vop_create, dvp)(VNHEAD(dvp),d,vap,vpp,cr)
+#define bhv_vop_remove(dvp,d,cr)	VOP(vop_remove, dvp)(VNHEAD(dvp),d,cr)
+#define	bhv_vop_link(dvp,fvp,d,cr)	VOP(vop_link, dvp)(VNHEAD(dvp),fvp,d,cr)
+#define	bhv_vop_rename(fvp,fnm,tdvp,tnm,cr)				\
+		VOP(vop_rename, fvp)(VNHEAD(fvp),fnm,tdvp,tnm,cr)
+#define	bhv_vop_mkdir(dp,d,vap,vpp,cr)					\
+		VOP(vop_mkdir, dp)(VNHEAD(dp),d,vap,vpp,cr)
+#define	bhv_vop_rmdir(dp,d,cr)	 	VOP(vop_rmdir, dp)(VNHEAD(dp),d,cr)
+#define	bhv_vop_readdir(vp,uiop,cr,eofp)				\
+		VOP(vop_readdir, vp)(VNHEAD(vp),uiop,cr,eofp)
+#define	bhv_vop_symlink(dvp,d,vap,tnm,vpp,cr)				\
+		VOP(vop_symlink, dvp)(VNHEAD(dvp),d,vap,tnm,vpp,cr)
+#define	bhv_vop_readlink(vp,uiop,fl,cr)					\
+		VOP(vop_readlink, vp)(VNHEAD(vp),uiop,fl,cr)
+#define	bhv_vop_fsync(vp,f,cr,b,e)	VOP(vop_fsync, vp)(VNHEAD(vp),f,cr,b,e)
+#define bhv_vop_inactive(vp,cr)		VOP(vop_inactive, vp)(VNHEAD(vp),cr)
+#define bhv_vop_release(vp)		VOP(vop_release, vp)(VNHEAD(vp))
+#define bhv_vop_fid2(vp,fidp)		VOP(vop_fid2, vp)(VNHEAD(vp),fidp)
+#define bhv_vop_rwlock(vp,i)		VOP(vop_rwlock, vp)(VNHEAD(vp),i)
+#define bhv_vop_rwlock_try(vp,i)	VOP(vop_rwlock, vp)(VNHEAD(vp),i)
+#define bhv_vop_rwunlock(vp,i)		VOP(vop_rwunlock, vp)(VNHEAD(vp),i)
+#define bhv_vop_frlock(vp,c,fl,flags,offset,fr)				\
+		VOP(vop_frlock, vp)(VNHEAD(vp),c,fl,flags,offset,fr)
+#define bhv_vop_reclaim(vp)		VOP(vop_reclaim, vp)(VNHEAD(vp))
+#define bhv_vop_attr_get(vp, name, val, vallenp, fl, cred)		\
+		VOP(vop_attr_get, vp)(VNHEAD(vp),name,val,vallenp,fl,cred)
+#define	bhv_vop_attr_set(vp, name, val, vallen, fl, cred)		\
+		VOP(vop_attr_set, vp)(VNHEAD(vp),name,val,vallen,fl,cred)
+#define	bhv_vop_attr_remove(vp, name, flags, cred)			\
+		VOP(vop_attr_remove, vp)(VNHEAD(vp),name,flags,cred)
+#define	bhv_vop_attr_list(vp, buf, buflen, fl, cursor, cred)		\
+		VOP(vop_attr_list, vp)(VNHEAD(vp),buf,buflen,fl,cursor,cred)
+#define bhv_vop_link_removed(vp, dvp, linkzero)				\
+		VOP(vop_link_removed, vp)(VNHEAD(vp), dvp, linkzero)
+#define bhv_vop_vnode_change(vp, cmd, val)				\
+		VOP(vop_vnode_change, vp)(VNHEAD(vp), cmd, val)
+#define bhv_vop_toss_pages(vp, first, last, fiopt)			\
+		VOP(vop_tosspages, vp)(VNHEAD(vp), first, last, fiopt)
+#define bhv_vop_flushinval_pages(vp, first, last, fiopt)		\
+		VOP(vop_flushinval_pages, vp)(VNHEAD(vp),first,last,fiopt)
+#define bhv_vop_flush_pages(vp, first, last, flags, fiopt)		\
+		VOP(vop_flush_pages, vp)(VNHEAD(vp),first,last,flags,fiopt)
+#define bhv_vop_ioctl(vp, inode, filp, fl, cmd, arg)			\
+		VOP(vop_ioctl, vp)(VNHEAD(vp),inode,filp,fl,cmd,arg)
+#define bhv_vop_iflush(vp, flags)	VOP(vop_iflush, vp)(VNHEAD(vp), flags)
 
 /*
  * Flags for read/write calls - same values as IRIX
@@ -377,7 +326,7 @@ typedef struct vnodeops {
 #define IO_INVIS	0x00020		/* don't update inode timestamps */
 
 /*
- * Flags for VOP_IFLUSH call
+ * Flags for vop_iflush call
  */
 #define FLUSH_SYNC		1	/* wait for flush to complete	*/
 #define FLUSH_INODE		2	/* flush the inode itself	*/
@@ -385,8 +334,7 @@ typedef struct vnodeops {
 					 * this inode out to disk	*/
 
 /*
- * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
- *	VOP_FLUSH_PAGES.
+ * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
  */
 #define FI_NONE			0	/* none */
 #define FI_REMAPF		1	/* Do a remapf prior to the operation */
@@ -398,7 +346,7 @@ typedef struct vnodeops {
  * Vnode attributes.  va_mask indicates those attributes the caller
  * wants to set or extract.
  */
-typedef struct vattr {
+typedef struct bhv_vattr {
 	int		va_mask;	/* bit-mask of attributes present */
 	mode_t		va_mode;	/* file access mode and type */
 	xfs_nlink_t	va_nlink;	/* number of references to file */
@@ -418,7 +366,7 @@ typedef struct vattr {
 	u_long		va_nextents;	/* number of extents in file */
 	u_long		va_anextents;	/* number of attr extents in file */
 	prid_t		va_projid;	/* project id */
-} vattr_t;
+} bhv_vattr_t;
 
 /*
  * setattr or getattr attributes
@@ -492,29 +440,17 @@ typedef struct vattr {
 	(VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 
 extern void	vn_init(void);
-extern vnode_t	*vn_initialize(struct inode *);
-
-/*
- * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
- */
-typedef struct vnode_map {
-	vfs_t		*v_vfsp;
-	vnumber_t	v_number;		/* in-core vnode number */
-	xfs_ino_t	v_ino;			/* inode #	*/
-} vmap_t;
-
-#define VMAP(vp, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
-			 (vmap).v_number = (vp)->v_number,	\
-			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
+extern bhv_vnode_t	*vn_initialize(struct inode *);
+extern int	vn_revalidate(struct bhv_vnode *);
+extern int	__vn_revalidate(struct bhv_vnode *, bhv_vattr_t *);
+extern void	vn_revalidate_core(struct bhv_vnode *, bhv_vattr_t *);
 
-extern int	vn_revalidate(struct vnode *);
-extern int	__vn_revalidate(struct vnode *, vattr_t *);
-extern void	vn_revalidate_core(struct vnode *, vattr_t *);
+extern void	vn_iowait(struct bhv_vnode *vp);
+extern void	vn_iowake(struct bhv_vnode *vp);
 
-extern void	vn_iowait(struct vnode *vp);
-extern void	vn_iowake(struct vnode *vp);
+extern void	vn_ioerror(struct bhv_vnode *vp, int error, char *f, int l);
 
-static inline int vn_count(struct vnode *vp)
+static inline int vn_count(struct bhv_vnode *vp)
 {
 	return atomic_read(&vn_to_inode(vp)->i_count);
 }
@@ -522,7 +458,7 @@ static inline int vn_count(struct vnode *vp)
 /*
  * Vnode reference counting functions (and macros for compatibility).
  */
-extern vnode_t	*vn_hold(struct vnode *);
+extern bhv_vnode_t	*vn_hold(struct bhv_vnode *);
 
 #if defined(XFS_VNODE_TRACE)
 #define VN_HOLD(vp)		\
@@ -536,7 +472,7 @@ extern vnode_t	*vn_hold(struct vnode *);
 #define VN_RELE(vp)		(iput(vn_to_inode(vp)))
 #endif
 
-static inline struct vnode *vn_grab(struct vnode *vp)
+static inline struct bhv_vnode *vn_grab(struct bhv_vnode *vp)
 {
 	struct inode *inode = igrab(vn_to_inode(vp));
 	return inode ? vn_from_inode(inode) : NULL;
@@ -554,32 +490,39 @@ static inline struct vnode *vn_grab(struct vnode *vp)
  */
 #define VN_LOCK(vp)		mutex_spinlock(&(vp)->v_lock)
 #define VN_UNLOCK(vp, s)	mutex_spinunlock(&(vp)->v_lock, s)
-#define VN_FLAGSET(vp,b)	vn_flagset(vp,b)
-#define VN_FLAGCLR(vp,b)	vn_flagclr(vp,b)
 
-static __inline__ void vn_flagset(struct vnode *vp, uint flag)
+static __inline__ void vn_flagset(struct bhv_vnode *vp, uint flag)
 {
 	spin_lock(&vp->v_lock);
 	vp->v_flag |= flag;
 	spin_unlock(&vp->v_lock);
 }
 
-static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
+static __inline__ uint vn_flagclr(struct bhv_vnode *vp, uint flag)
 {
+	uint	cleared;
+
 	spin_lock(&vp->v_lock);
+	cleared = (vp->v_flag & flag);
 	vp->v_flag &= ~flag;
 	spin_unlock(&vp->v_lock);
+	return cleared;
 }
 
+#define VMODIFY(vp)	vn_flagset(vp, VMODIFIED)
+#define VUNMODIFY(vp)	vn_flagclr(vp, VMODIFIED)
+#define VTRUNCATE(vp)	vn_flagset(vp, VTRUNCATED)
+#define VUNTRUNCATE(vp)	vn_flagclr(vp, VTRUNCATED)
+
 /*
  * Dealing with bad inodes
  */
-static inline void vn_mark_bad(struct vnode *vp)
+static inline void vn_mark_bad(struct bhv_vnode *vp)
 {
 	make_bad_inode(vn_to_inode(vp));
 }
 
-static inline int VN_BAD(struct vnode *vp)
+static inline int VN_BAD(struct bhv_vnode *vp)
 {
 	return is_bad_inode(vn_to_inode(vp));
 }
@@ -587,18 +530,18 @@ static inline int VN_BAD(struct vnode *vp)
 /*
  * Extracting atime values in various formats
  */
-static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
+static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime)
 {
 	bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
 	bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
 }
 
-static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
+static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts)
 {
 	*ts = vp->v_inode.i_atime;
 }
 
-static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
+static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 {
 	*tt = vp->v_inode.i_atime.tv_sec;
 }
@@ -610,11 +553,10 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
 #define VN_CACHED(vp)	(vn_to_inode(vp)->i_mapping->nrpages)
 #define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \
 					PAGECACHE_TAG_DIRTY)
-#define VMODIFY(vp)	VN_FLAGSET(vp, VMODIFIED)
-#define VUNMODIFY(vp)	VN_FLAGCLR(vp, VMODIFIED)
+#define VN_TRUNC(vp)	((vp)->v_flag & VTRUNCATED)
 
 /*
- * Flags to VOP_SETATTR/VOP_GETATTR.
+ * Flags to vop_setattr/getattr.
  */
 #define	ATTR_UTIME	0x01	/* non-default utime(2) request */
 #define	ATTR_DMI	0x08	/* invocation from a DMI function */
@@ -624,7 +566,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
 #define ATTR_NOSIZETOK	0x400	/* Don't get the SIZE token */
 
 /*
- * Flags to VOP_FSYNC and VOP_RECLAIM.
+ * Flags to vop_fsync/reclaim.
  */
 #define FSYNC_NOWAIT	0	/* asynchronous flush */
 #define FSYNC_WAIT	0x1	/* synchronous fsync or forced reclaim */
@@ -643,11 +585,11 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
 #define	VNODE_KTRACE_REF	4
 #define	VNODE_KTRACE_RELE	5
 
-extern void vn_trace_entry(struct vnode *, const char *, inst_t *);
-extern void vn_trace_exit(struct vnode *, const char *, inst_t *);
-extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
-extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
-extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_entry(struct bhv_vnode *, const char *, inst_t *);
+extern void vn_trace_exit(struct bhv_vnode *, const char *, inst_t *);
+extern void vn_trace_hold(struct bhv_vnode *, char *, int, inst_t *);
+extern void vn_trace_ref(struct bhv_vnode *, char *, int, inst_t *);
+extern void vn_trace_rele(struct bhv_vnode *, char *, int, inst_t *);
 
 #define	VN_TRACE(vp)		\
 	vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 772ac48329e..3aa77153185 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -444,7 +442,7 @@ xfs_qm_dqalloc(
 			      XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
 			      &firstblock,
 			      XFS_QM_DQALLOC_SPACE_RES(mp),
-			      &map, &nmaps, &flist))) {
+			      &map, &nmaps, &flist, NULL))) {
 		goto error0;
 	}
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -559,7 +557,7 @@ xfs_qm_dqtobp(
 		error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
 				  XFS_DQUOT_CLUSTER_SIZE_FSB,
 				  XFS_BMAPI_METADATA,
-				  NULL, 0, &map, &nmaps, NULL);
+				  NULL, 0, &map, &nmaps, NULL, NULL);
 
 		xfs_iunlock(quotip, XFS_ILOCK_SHARED);
 		if (error)
@@ -1261,7 +1259,7 @@ xfs_qm_dqflush(
 
 	if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
 			   0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
-		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
+		xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
 		return XFS_ERROR(EIO);
 	}
 
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index c0c629663a5..78d3ab95c5f 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -119,7 +119,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
  */
 #define xfs_dqflock(dqp)	 { psema(&((dqp)->q_flock), PINOD | PRECALC);\
 				   (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
-#define xfs_dqfunlock(dqp)	 { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \
+#define xfs_dqfunlock(dqp)	 { ASSERT(issemalocked(&((dqp)->q_flock))); \
 				   vsema(&((dqp)->q_flock)); \
 				   (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
 
@@ -128,7 +128,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 #define XFS_DQ_PINUNLOCK(dqp, s)   mutex_spinunlock( \
 				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
 
-#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0)
+#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
 #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 546f48af882..5b2dcc58b24 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -248,7 +246,7 @@ xfs_qm_dquot_logitem_pushbuf(
 	 * inode flush completed and the inode was taken off the AIL.
 	 * So, just get out.
 	 */
-	if ((valusema(&(dqp->q_flock)) > 0)  ||
+	if (!issemalocked(&(dqp->q_flock))  ||
 	    ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
 		qip->qli_pushbuf_flag = 0;
 		xfs_dqunlock(dqp);
@@ -261,7 +259,7 @@ xfs_qm_dquot_logitem_pushbuf(
 	if (bp != NULL) {
 		if (XFS_BUF_ISDELAYWRITE(bp)) {
 			dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
-				  (valusema(&(dqp->q_flock)) <= 0));
+				  issemalocked(&(dqp->q_flock)));
 			qip->qli_pushbuf_flag = 0;
 			xfs_dqunlock(dqp);
 
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7fb5eca9bd5..e23e45535c4 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -1603,7 +1601,7 @@ xfs_qm_dqiterate(
 				  maxlblkcnt - lblkno,
 				  XFS_BMAPI_METADATA,
 				  NULL,
-				  0, map, &nmaps, NULL);
+				  0, map, &nmaps, NULL, NULL);
 		xfs_iunlock(qip, XFS_ILOCK_SHARED);
 		if (error)
 			break;
@@ -1905,9 +1903,7 @@ xfs_qm_quotacheck(
 		 */
 		if ((error = xfs_bulkstat(mp, &lastino, &count,
 				     xfs_qm_dqusage_adjust, NULL,
-				     structsz, NULL,
-				     BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
-				     &done)))
+				     structsz, NULL, BULKSTAT_FG_IGET, &done)))
 			break;
 
 	} while (! done);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 6838b36d95a..e95e99f7168 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -129,7 +127,7 @@ xfs_qm_parseargs(
 		return XFS_ERROR(EINVAL);
 	}
 
-	PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
+	error = bhv_next_vfs_parseargs(BHV_NEXT(bhv), options, args, update);
 	if (!error && !referenced)
 		bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
 	return error;
@@ -140,9 +138,8 @@ xfs_qm_showargs(
 	struct bhv_desc		*bhv,
 	struct seq_file		*m)
 {
-	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct bhv_vfs		*vfsp = bhvtovfs(bhv);
 	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
-	int			error;
 
 	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
 		(mp->m_qflags & XFS_UQUOTA_ENFD) ?
@@ -165,8 +162,7 @@ xfs_qm_showargs(
 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 		seq_puts(m, "," MNTOPT_NOQUOTA);
 
-	PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
-	return error;
+	return bhv_next_vfs_showargs(BHV_NEXT(bhv), m);
 }
 
 STATIC int
@@ -175,14 +171,67 @@ xfs_qm_mount(
 	struct xfs_mount_args	*args,
 	struct cred		*cr)
 {
-	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct bhv_vfs		*vfsp = bhvtovfs(bhv);
 	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
-	int			error;
 
 	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA))
 		xfs_qm_mount_quotainit(mp, args->flags);
-	PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
-	return error;
+	return bhv_next_vfs_mount(BHV_NEXT(bhv), args, cr);
+}
+
+/*
+ * Directory tree accounting is implemented using project quotas, where
+ * the project identifier is inherited from parent directories.
+ * A statvfs (df, etc.) of a directory that is using project quota should
+ * return a statvfs of the project, not the entire filesystem.
+ * This makes such trees appear as if they are filesystems in themselves.
+ */
+STATIC int
+xfs_qm_statvfs(
+	struct bhv_desc		*bhv,
+	bhv_statvfs_t		*statp,
+	struct bhv_vnode	*vnode)
+{
+	xfs_mount_t		*mp;
+	xfs_inode_t		*ip;
+	xfs_dquot_t		*dqp;
+	xfs_disk_dquot_t	*dp;
+	__uint64_t		limit;
+	int			error;
+
+	error = bhv_next_vfs_statvfs(BHV_NEXT(bhv), statp, vnode);
+	if (error || !vnode)
+		return error;
+
+	mp = XFS_BHVTOM(bhv);
+	ip = xfs_vtoi(vnode);
+
+	if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
+		return 0;
+	if (!(mp->m_qflags & XFS_PQUOTA_ACCT))
+		return 0;
+	if (!(mp->m_qflags & XFS_OQUOTA_ENFD))
+		return 0;
+
+	if (xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp))
+		return 0;
+	dp = &dqp->q_core;
+
+	limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit;
+	if (limit && statp->f_blocks > limit) {
+		statp->f_blocks = limit;
+		statp->f_bfree = (statp->f_blocks > dp->d_bcount) ?
+					(statp->f_blocks - dp->d_bcount) : 0;
+	}
+	limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit;
+	if (limit && statp->f_files > limit) {
+		statp->f_files = limit;
+		statp->f_ffree = (statp->f_files > dp->d_icount) ?
+					(statp->f_ffree - dp->d_icount) : 0;
+	}
+
+	xfs_qm_dqput(dqp);
+	return 0;
 }
 
 STATIC int
@@ -191,7 +240,7 @@ xfs_qm_syncall(
 	int			flags,
 	cred_t			*credp)
 {
-	struct vfs		*vfsp = bhvtovfs(bhv);
+	struct bhv_vfs		*vfsp = bhvtovfs(bhv);
 	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
 	int			error;
 
@@ -210,8 +259,7 @@ xfs_qm_syncall(
 			}
 		}
 	}
-	PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error);
-	return error;
+	return bhv_next_vfs_sync(BHV_NEXT(bhv), flags, credp);
 }
 
 STATIC int
@@ -346,11 +394,12 @@ STATIC struct xfs_qmops xfs_qmcore_xfs = {
 	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
 };
 
-struct bhv_vfsops xfs_qmops = { {
+struct bhv_module_vfsops xfs_qmops = { {
 	BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM),
 	.vfs_parseargs		= xfs_qm_parseargs,
 	.vfs_showargs		= xfs_qm_showargs,
 	.vfs_mount		= xfs_qm_mount,
+	.vfs_statvfs		= xfs_qm_statvfs,
 	.vfs_sync		= xfs_qm_syncall,
 	.vfs_quotactl		= xfs_qm_quotactl, },
 };
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 0570f773355..6f858fb81a3 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c55db463bbf..ed620c4d159 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,7 +26,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -35,7 +34,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -91,8 +89,8 @@ xfs_qm_quotactl(
 	xfs_caddr_t	addr)
 {
 	xfs_mount_t	*mp;
+	bhv_vfs_t	*vfsp;
 	int		error;
-	struct vfs	*vfsp;
 
 	vfsp = bhvtovfs(bdp);
 	mp = XFS_VFSTOM(vfsp);
@@ -1035,7 +1033,7 @@ xfs_qm_dqrele_all_inodes(
 {
 	xfs_inode_t	*ip, *topino;
 	uint		ireclaims;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 	boolean_t	vnode_refd;
 
 	ASSERT(mp->m_quotainfo);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 9168918db25..0242e9666e8 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -33,7 +32,6 @@
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index b08b3d9345b..36fbeccdc72 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -47,7 +47,7 @@ cmn_err(register int level, char *fmt, ...)
 	va_start(ap, fmt);
 	if (*fmt == '!') fp++;
 	len = vsprintf(message, fp, ap);
-	if (message[len-1] != '\n')
+	if (level != CE_DEBUG && message[len-1] != '\n')
 		strcat(message, "\n");
 	printk("%s%s", err_level[level], message);
 	va_end(ap);
@@ -68,7 +68,7 @@ icmn_err(register int level, char *fmt, va_list ap)
 		level = XFS_MAX_ERR_LEVEL;
 	spin_lock_irqsave(&xfs_err_lock,flags);
 	len = vsprintf(message, fmt, ap);
-	if (message[len-1] != '\n')
+	if (level != CE_DEBUG && message[len-1] != '\n')
 		strcat(message, "\n");
 	spin_unlock_irqrestore(&xfs_err_lock,flags);
 	printk("%s%s", err_level[level], message);
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index e3bf58112e7..4f54dca662a 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -33,9 +33,6 @@ extern void cmn_err(int, char *, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern void assfail(char *expr, char *f, int l);
 
-#define prdev(fmt,targ,args...) \
-	printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-
 #define ASSERT_ALWAYS(expr)	\
 	(unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 2539af34eb6..4b0cb474be4 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,12 +21,10 @@
 #include "xfs_bit.h"
 #include "xfs_inum.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -39,15 +37,15 @@
 #include <linux/capability.h>
 #include <linux/posix_acl_xattr.h>
 
-STATIC int	xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *);
+STATIC int	xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *);
 STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);
 STATIC void	xfs_acl_get_endian(xfs_acl_t *);
 STATIC int	xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
 STATIC int	xfs_acl_invalid(xfs_acl_t *);
 STATIC void	xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void	xfs_acl_get_attr(vnode_t *, xfs_acl_t *, int, int, int *);
-STATIC void	xfs_acl_set_attr(vnode_t *, xfs_acl_t *, int, int *);
-STATIC int	xfs_acl_allow_set(vnode_t *, int);
+STATIC void	xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *);
+STATIC void	xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *);
+STATIC int	xfs_acl_allow_set(bhv_vnode_t *, int);
 
 kmem_zone_t *xfs_acl_zone;
 
@@ -57,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;
  */
 int
 xfs_acl_vhasacl_access(
-	vnode_t		*vp)
+	bhv_vnode_t	*vp)
 {
 	int		error;
 
@@ -70,7 +68,7 @@ xfs_acl_vhasacl_access(
  */
 int
 xfs_acl_vhasacl_default(
-	vnode_t		*vp)
+	bhv_vnode_t	*vp)
 {
 	int		error;
 
@@ -209,7 +207,7 @@ posix_acl_xfs_to_xattr(
 
 int
 xfs_acl_vget(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	void		*acl,
 	size_t		size,
 	int		kind)
@@ -241,10 +239,10 @@ xfs_acl_vget(
 			goto out;
 		}
 		if (kind == _ACL_TYPE_ACCESS) {
-			vattr_t	va;
+			bhv_vattr_t	va;
 
 			va.va_mask = XFS_AT_MODE;
-			VOP_GETATTR(vp, &va, 0, sys_cred, error);
+			error = bhv_vop_getattr(vp, &va, 0, sys_cred);
 			if (error)
 				goto out;
 			xfs_acl_sync_mode(va.va_mode, xfs_acl);
@@ -260,7 +258,7 @@ out:
 
 int
 xfs_acl_vremove(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	int		kind)
 {
 	int		error;
@@ -268,9 +266,9 @@ xfs_acl_vremove(
 	VN_HOLD(vp);
 	error = xfs_acl_allow_set(vp, kind);
 	if (!error) {
-		VOP_ATTR_REMOVE(vp, kind == _ACL_TYPE_DEFAULT?
-				SGI_ACL_DEFAULT: SGI_ACL_FILE,
-				ATTR_ROOT, sys_cred, error);
+		error = bhv_vop_attr_remove(vp, kind == _ACL_TYPE_DEFAULT?
+						SGI_ACL_DEFAULT: SGI_ACL_FILE,
+						ATTR_ROOT, sys_cred);
 		if (error == ENOATTR)
 			error = 0;	/* 'scool */
 	}
@@ -280,7 +278,7 @@ xfs_acl_vremove(
 
 int
 xfs_acl_vset(
-	vnode_t			*vp,
+	bhv_vnode_t		*vp,
 	void			*acl,
 	size_t			size,
 	int			kind)
@@ -370,10 +368,10 @@ xfs_acl_iaccess(
 
 STATIC int
 xfs_acl_allow_set(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	int		kind)
 {
-	vattr_t		va;
+	bhv_vattr_t	va;
 	int		error;
 
 	if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
@@ -383,7 +381,7 @@ xfs_acl_allow_set(
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
 		return EROFS;
 	va.va_mask = XFS_AT_UID;
-	VOP_GETATTR(vp, &va, 0, NULL, error);
+	error = bhv_vop_getattr(vp, &va, 0, NULL);
 	if (error)
 		return error;
 	if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
@@ -606,7 +604,7 @@ xfs_acl_get_endian(
  */
 STATIC void
 xfs_acl_get_attr(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_acl_t	*aclp,
 	int		kind,
 	int		flags,
@@ -616,9 +614,9 @@ xfs_acl_get_attr(
 
 	ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
 	flags |= ATTR_ROOT;
-	VOP_ATTR_GET(vp,
-		kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT,
-		(char *)aclp, &len, flags, sys_cred, *error);
+	*error = bhv_vop_attr_get(vp, kind == _ACL_TYPE_ACCESS ?
+					SGI_ACL_FILE : SGI_ACL_DEFAULT,
+					(char *)aclp, &len, flags, sys_cred);
 	if (*error || (flags & ATTR_KERNOVAL))
 		return;
 	xfs_acl_get_endian(aclp);
@@ -629,7 +627,7 @@ xfs_acl_get_attr(
  */
 STATIC void
 xfs_acl_set_attr(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_acl_t	*aclp,
 	int		kind,
 	int		*error)
@@ -654,19 +652,19 @@ xfs_acl_set_attr(
 		INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
 	}
 	INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-	VOP_ATTR_SET(vp,
-		kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT,
-		(char *)newacl, len, ATTR_ROOT, sys_cred, *error);
+	*error = bhv_vop_attr_set(vp, kind == _ACL_TYPE_ACCESS ?
+				SGI_ACL_FILE: SGI_ACL_DEFAULT,
+				(char *)newacl, len, ATTR_ROOT, sys_cred);
 	_ACL_FREE(newacl);
 }
 
 int
 xfs_acl_vtoacl(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_acl_t	*access_acl,
 	xfs_acl_t	*default_acl)
 {
-	vattr_t		va;
+	bhv_vattr_t	va;
 	int		error = 0;
 
 	if (access_acl) {
@@ -678,7 +676,7 @@ xfs_acl_vtoacl(
 		if (!error) {
 			/* Got the ACL, need the mode... */
 			va.va_mask = XFS_AT_MODE;
-			VOP_GETATTR(vp, &va, 0, sys_cred, error);
+			error = bhv_vop_getattr(vp, &va, 0, sys_cred);
 		}
 
 		if (error)
@@ -701,8 +699,8 @@ xfs_acl_vtoacl(
  */
 int
 xfs_acl_inherit(
-	vnode_t		*vp,
-	vattr_t		*vap,
+	bhv_vnode_t	*vp,
+	bhv_vattr_t	*vap,
 	xfs_acl_t	*pdaclp)
 {
 	xfs_acl_t	*cacl;
@@ -757,11 +755,11 @@ xfs_acl_inherit(
  */
 STATIC int
 xfs_acl_setmode(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_acl_t	*acl,
 	int		*basicperms)
 {
-	vattr_t		va;
+	bhv_vattr_t	va;
 	xfs_acl_entry_t	*ap;
 	xfs_acl_entry_t	*gap = NULL;
 	int		i, error, nomask = 1;
@@ -776,7 +774,7 @@ xfs_acl_setmode(
 	 * mode.  The m:: bits take precedence over the g:: bits.
 	 */
 	va.va_mask = XFS_AT_MODE;
-	VOP_GETATTR(vp, &va, 0, sys_cred, error);
+	error = bhv_vop_getattr(vp, &va, 0, sys_cred);
 	if (error)
 		return error;
 
@@ -810,8 +808,7 @@ xfs_acl_setmode(
 	if (gap && nomask)
 		va.va_mode |= gap->ae_perm << 3;
 
-	VOP_SETATTR(vp, &va, 0, sys_cred, error);
-	return error;
+	return bhv_vop_setattr(vp, &va, 0, sys_cred);
 }
 
 /*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 538d0d65b04..f853cf1a627 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -50,7 +50,7 @@ typedef struct xfs_acl {
 #ifdef CONFIG_XFS_POSIX_ACL
 
 struct vattr;
-struct vnode;
+struct bhv_vnode;
 struct xfs_inode;
 
 extern struct kmem_zone *xfs_acl_zone;
@@ -58,14 +58,14 @@ extern struct kmem_zone *xfs_acl_zone;
 		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
 #define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
 
-extern int xfs_acl_inherit(struct vnode *, struct vattr *, xfs_acl_t *);
+extern int xfs_acl_inherit(struct bhv_vnode *, struct bhv_vattr *, xfs_acl_t *);
 extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct vnode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct vnode *);
-extern int xfs_acl_vhasacl_default(struct vnode *);
-extern int xfs_acl_vset(struct vnode *, void *, size_t, int);
-extern int xfs_acl_vget(struct vnode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct vnode *vp, int);
+extern int xfs_acl_vtoacl(struct bhv_vnode *, xfs_acl_t *, xfs_acl_t *);
+extern int xfs_acl_vhasacl_access(struct bhv_vnode *);
+extern int xfs_acl_vhasacl_default(struct bhv_vnode *);
+extern int xfs_acl_vset(struct bhv_vnode *, void *, size_t, int);
+extern int xfs_acl_vget(struct bhv_vnode *, void *, size_t, int);
+extern int xfs_acl_vremove(struct bhv_vnode *, int);
 
 #define _ACL_TYPE_ACCESS	1
 #define _ACL_TYPE_DEFAULT	2
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8558226281c..eef6763f3a6 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -1862,7 +1860,7 @@ xfs_alloc_fix_freelist(
 		(pag->pagf_longest - delta) :
 		(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
 	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
-	    (args->minleft &&
+	    (!(flags & XFS_ALLOC_FLAG_FREEING) &&
 	     (int)(pag->pagf_freeblks + pag->pagf_flcount -
 		   need - args->total) <
 	     (int)args->minleft)) {
@@ -1898,7 +1896,7 @@ xfs_alloc_fix_freelist(
 	longest = (longest > delta) ? (longest - delta) :
 		(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
 	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
-	     (args->minleft &&
+	     (!(flags & XFS_ALLOC_FLAG_FREEING) &&
 		(int)(be32_to_cpu(agf->agf_freeblks) +
 		   be32_to_cpu(agf->agf_flcount) - need - args->total) <
 	     (int)args->minleft)) {
@@ -1951,8 +1949,14 @@ xfs_alloc_fix_freelist(
 		 * the restrictions correctly.  Can happen for free calls
 		 * on a completely full ag.
 		 */
-		if (targs.agbno == NULLAGBLOCK)
+		if (targs.agbno == NULLAGBLOCK) {
+			if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+				xfs_trans_brelse(tp, agflbp);
+				args->agbp = NULL;
+				return 0;
+			}
 			break;
+		}
 		/*
 		 * Put each allocated block on the list.
 		 */
@@ -2360,8 +2364,19 @@ xfs_alloc_vextent(
 			if (args->agno == sagno &&
 			    type == XFS_ALLOCTYPE_START_BNO)
 				args->type = XFS_ALLOCTYPE_THIS_AG;
-			if (++(args->agno) == mp->m_sb.sb_agcount)
-				args->agno = 0;
+			/*
+			* For the first allocation, we can try any AG to get
+			* space.  However, if we already have allocated a
+			* block, we don't want to try AGs whose number is below
+			* sagno. Otherwise, we may end up with out-of-order
+			* locking of AGF, which might cause deadlock.
+			*/
+			if (++(args->agno) == mp->m_sb.sb_agcount) {
+				if (args->firstblock != NULLFSBLOCK)
+					args->agno = sagno;
+				else
+					args->agno = 0;
+			}
 			/*
 			 * Reached the starting a.g., must either be done
 			 * or switch to non-trylock mode.
@@ -2443,7 +2458,7 @@ xfs_free_extent(
 	args.minlen = args.minleft = args.minalignslop = 0;
 	down_read(&args.mp->m_peraglock);
 	args.pag = &args.mp->m_perag[args.agno];
-	if ((error = xfs_alloc_fix_freelist(&args, 0)))
+	if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
 		goto error0;
 #ifdef DEBUG
 	ASSERT(args.agbp != NULL);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2d1f8928b26..650591f999a 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype
  * Flags for xfs_alloc_fix_freelist.
  */
 #define	XFS_ALLOC_FLAG_TRYLOCK	0x00000001  /* use trylock for buffer locking */
+#define	XFS_ALLOC_FLAG_FREEING	0x00000002  /* indicate caller is freeing extents*/
 
 /*
  * Argument structure for xfs_alloc routines.
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg {
 	char		wasfromfl;	/* set if allocation is from freelist */
 	char		isfl;		/* set if is freelist blocks - !acctg */
 	char		userdata;	/* set if this is user data */
+	xfs_fsblock_t	firstblock;	/* io first block allocated */
 } xfs_alloc_arg_t;
 
 /*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index a1d92da86cc..7446556e802 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b6e1e02bbb2..1a210104327 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -27,7 +27,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -35,7 +34,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -1910,7 +1908,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
 		error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
 				  args->rmtblkcnt,
 				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-				  NULL, 0, map, &nmap, NULL);
+				  NULL, 0, map, &nmap, NULL, NULL);
 		if (error)
 			return(error);
 		ASSERT(nmap >= 1);
@@ -1988,7 +1986,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
 				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
 							XFS_BMAPI_WRITE,
 				  args->firstblock, args->total, &map, &nmap,
-				  args->flist);
+				  args->flist, NULL);
 		if (!error) {
 			error = xfs_bmap_finish(&args->trans, args->flist,
 						*args->firstblock, &committed);
@@ -2039,7 +2037,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
 		error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
 				  args->rmtblkcnt,
 				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-				  args->firstblock, 0, &map, &nmap, NULL);
+				  args->firstblock, 0, &map, &nmap,
+				  NULL, NULL);
 		if (error) {
 			return(error);
 		}
@@ -2104,7 +2103,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
 					args->rmtblkcnt,
 					XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
 					args->firstblock, 0, &map, &nmap,
-					args->flist);
+					args->flist, NULL);
 		if (error) {
 			return(error);
 		}
@@ -2142,7 +2141,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
 		XFS_BMAP_INIT(args->flist, args->firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-				    1, args->firstblock, args->flist, &done);
+				    1, args->firstblock, args->flist,
+				    NULL, &done);
 		if (!error) {
 			error = xfs_bmap_finish(&args->trans, args->flist,
 						*args->firstblock, &committed);
@@ -2322,56 +2322,56 @@ xfs_attr_trace_enter(int type, char *where,
 
 STATIC int
 posix_acl_access_set(
-	vnode_t	*vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
 }
 
 STATIC int
 posix_acl_access_remove(
-	struct vnode *vp, char *name, int xflags)
+	bhv_vnode_t *vp, char *name, int xflags)
 {
 	return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
 }
 
 STATIC int
 posix_acl_access_get(
-	vnode_t *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
 }
 
 STATIC int
 posix_acl_access_exists(
-	vnode_t *vp)
+	bhv_vnode_t *vp)
 {
 	return xfs_acl_vhasacl_access(vp);
 }
 
 STATIC int
 posix_acl_default_set(
-	vnode_t	*vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
 }
 
 STATIC int
 posix_acl_default_get(
-	vnode_t *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
 }
 
 STATIC int
 posix_acl_default_remove(
-	struct vnode *vp, char *name, int xflags)
+	bhv_vnode_t *vp, char *name, int xflags)
 {
 	return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
 }
 
 STATIC int
 posix_acl_default_exists(
-	vnode_t *vp)
+	bhv_vnode_t *vp)
 {
 	return xfs_acl_vhasacl_default(vp);
 }
@@ -2404,21 +2404,18 @@ STATIC struct attrnames *attr_system_names[] =
 
 STATIC int
 attr_generic_set(
-	struct vnode *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
-	int 	error;
-
-	VOP_ATTR_SET(vp, name, data, size, xflags, NULL, error);
-	return -error;
+	return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
 }
 
 STATIC int
 attr_generic_get(
-	struct vnode *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	int	error, asize = size;
 
-	VOP_ATTR_GET(vp, name, data, &asize, xflags, NULL, error);
+	error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
 	if (!error)
 		return asize;
 	return -error;
@@ -2426,12 +2423,9 @@ attr_generic_get(
 
 STATIC int
 attr_generic_remove(
-	struct vnode *vp, char *name, int xflags)
+	bhv_vnode_t *vp, char *name, int xflags)
 {
-	int	error;
-
-	VOP_ATTR_REMOVE(vp, name, xflags, NULL, error);
-	return -error;
+	return -bhv_vop_attr_remove(vp, name, xflags, NULL);
 }
 
 STATIC int
@@ -2459,7 +2453,7 @@ attr_generic_listadd(
 
 STATIC int
 attr_system_list(
-	struct vnode		*vp,
+	bhv_vnode_t		*vp,
 	void			*data,
 	size_t			size,
 	ssize_t			*result)
@@ -2481,12 +2475,12 @@ attr_system_list(
 
 int
 attr_generic_list(
-	struct vnode *vp, void *data, size_t size, int xflags, ssize_t *result)
+	bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
 {
 	attrlist_cursor_kern_t	cursor = { 0 };
 	int			error;
 
-	VOP_ATTR_LIST(vp, data, size, xflags, &cursor, NULL, error);
+	error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
 	if (error > 0)
 		return -error;
 	*result = -error;
@@ -2514,7 +2508,7 @@ attr_lookup_namespace(
  */
 STATIC int
 attr_user_capable(
-	struct vnode	*vp,
+	bhv_vnode_t	*vp,
 	cred_t		*cred)
 {
 	struct inode	*inode = vn_to_inode(vp);
@@ -2532,7 +2526,7 @@ attr_user_capable(
 
 STATIC int
 attr_trusted_capable(
-	struct vnode	*vp,
+	bhv_vnode_t	*vp,
 	cred_t		*cred)
 {
 	struct inode	*inode = vn_to_inode(vp);
@@ -2546,7 +2540,7 @@ attr_trusted_capable(
 
 STATIC int
 attr_secure_capable(
-	struct vnode	*vp,
+	bhv_vnode_t	*vp,
 	cred_t		*cred)
 {
 	return -ENOSECURITY;
@@ -2554,7 +2548,7 @@ attr_secure_capable(
 
 STATIC int
 attr_system_set(
-	struct vnode *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	attrnames_t	*namesp;
 	int		error;
@@ -2573,7 +2567,7 @@ attr_system_set(
 
 STATIC int
 attr_system_get(
-	struct vnode *vp, char *name, void *data, size_t size, int xflags)
+	bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
 {
 	attrnames_t	*namesp;
 
@@ -2585,7 +2579,7 @@ attr_system_get(
 
 STATIC int
 attr_system_remove(
-	struct vnode *vp, char *name, int xflags)
+	bhv_vnode_t *vp, char *name, int xflags)
 {
 	attrnames_t	*namesp;
 
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index b2c7b9fcded..981633f6c07 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -36,13 +36,13 @@
  *========================================================================*/
 
 struct cred;
-struct vnode;
+struct bhv_vnode;
 
-typedef int (*attrset_t)(struct vnode *, char *, void *, size_t, int);
-typedef int (*attrget_t)(struct vnode *, char *, void *, size_t, int);
-typedef int (*attrremove_t)(struct vnode *, char *, int);
-typedef int (*attrexists_t)(struct vnode *);
-typedef int (*attrcapable_t)(struct vnode *, struct cred *);
+typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int);
+typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int);
+typedef int (*attrremove_t)(struct bhv_vnode *, char *, int);
+typedef int (*attrexists_t)(struct bhv_vnode *);
+typedef int (*attrcapable_t)(struct bhv_vnode *, struct cred *);
 
 typedef struct attrnames {
 	char *		attr_name;
@@ -63,7 +63,7 @@ extern struct attrnames attr_trusted;
 extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
 
 extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
-extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *);
+extern int attr_generic_list(struct bhv_vnode *, void *, size_t, int, ssize_t *);
 
 #define ATTR_DONTFOLLOW	0x0001	/* -- unused, from IRIX -- */
 #define ATTR_ROOT	0x0002	/* use attrs in root (trusted) namespace */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9462be86aa1..9455051f012 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -34,7 +33,6 @@
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -2990,7 +2988,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 		nmap = 1;
 		error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
 					XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-					NULL, 0, &map, &nmap, NULL);
+					NULL, 0, &map, &nmap, NULL, NULL);
 		if (error) {
 			return(error);
 		}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 26939d364bc..3a613753906 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,13 +24,11 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -40,13 +38,15 @@
 #include "xfs_mount.h"
 #include "xfs_ialloc.h"
 #include "xfs_itable.h"
+#include "xfs_dir2_data.h"
+#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_block.h"
 #include "xfs_inode_item.h"
 #include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_rw.h"
 #include "xfs_quota.h"
@@ -101,6 +101,7 @@ xfs_bmap_add_extent(
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork, /* data or attr fork */
 	int			rsvd);	/* OK to allocate reserved blocks */
 
@@ -118,6 +119,7 @@ xfs_bmap_add_extent_delay_real(
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			rsvd);	/* OK to allocate reserved blocks */
 
 /*
@@ -131,6 +133,7 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			rsvd);	/* OK to allocate reserved blocks */
 
 /*
@@ -144,6 +147,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork); /* data or attr fork */
 
 /*
@@ -156,7 +160,8 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
-	int			*logflagsp); /* inode logging flags */
+	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta); /* Change made to incore extents */
 
 /*
  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -203,6 +208,7 @@ xfs_bmap_del_extent(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp,/* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork, /* data or attr fork */
 	int			rsvd);	 /* OK to allocate reserved blocks */
 
@@ -510,7 +516,7 @@ xfs_bmap_add_attrfork_local(
 		dargs.total = mp->m_dirblkfsbs;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
-		error = XFS_DIR_SHORTFORM_TO_SINGLE(mp, &dargs);
+		error = xfs_dir2_sf_to_block(&dargs);
 	} else
 		error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
 			XFS_DATA_FORK);
@@ -530,6 +536,7 @@ xfs_bmap_add_extent(
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork, /* data or attr fork */
 	int			rsvd)	/* OK to use reserved data blocks */
 {
@@ -567,6 +574,15 @@ xfs_bmap_add_extent(
 			logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
 		} else
 			logflags = 0;
+		/* DELTA: single new extent */
+		if (delta) {
+			if (delta->xed_startoff > new->br_startoff)
+				delta->xed_startoff = new->br_startoff;
+			if (delta->xed_blockcount <
+					new->br_startoff + new->br_blockcount)
+				delta->xed_blockcount = new->br_startoff +
+						new->br_blockcount;
+		}
 	}
 	/*
 	 * Any kind of new delayed allocation goes here.
@@ -576,7 +592,7 @@ xfs_bmap_add_extent(
 			ASSERT((cur->bc_private.b.flags &
 				XFS_BTCUR_BPRV_WASDEL) == 0);
 		if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new,
-				&logflags, rsvd)))
+				&logflags, delta, rsvd)))
 			goto done;
 	}
 	/*
@@ -587,7 +603,7 @@ xfs_bmap_add_extent(
 			ASSERT((cur->bc_private.b.flags &
 				XFS_BTCUR_BPRV_WASDEL) == 0);
 		if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
-				&logflags, whichfork)))
+				&logflags, delta, whichfork)))
 			goto done;
 	} else {
 		xfs_bmbt_irec_t	prev;	/* old extent at offset idx */
@@ -612,17 +628,17 @@ xfs_bmap_add_extent(
 						XFS_BTCUR_BPRV_WASDEL);
 				if ((error = xfs_bmap_add_extent_delay_real(ip,
 					idx, &cur, new, &da_new, first, flist,
-					&logflags, rsvd)))
+					&logflags, delta, rsvd)))
 					goto done;
 			} else if (new->br_state == XFS_EXT_NORM) {
 				ASSERT(new->br_state == XFS_EXT_NORM);
 				if ((error = xfs_bmap_add_extent_unwritten_real(
-					ip, idx, &cur, new, &logflags)))
+					ip, idx, &cur, new, &logflags, delta)))
 					goto done;
 			} else {
 				ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
 				if ((error = xfs_bmap_add_extent_unwritten_real(
-					ip, idx, &cur, new, &logflags)))
+					ip, idx, &cur, new, &logflags, delta)))
 					goto done;
 			}
 			ASSERT(*curp == cur || *curp == NULL);
@@ -635,7 +651,7 @@ xfs_bmap_add_extent(
 				ASSERT((cur->bc_private.b.flags &
 					XFS_BTCUR_BPRV_WASDEL) == 0);
 			if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
-					new, &logflags, whichfork)))
+					new, &logflags, delta, whichfork)))
 				goto done;
 		}
 	}
@@ -700,6 +716,7 @@ xfs_bmap_add_extent_delay_real(
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			rsvd)	/* OK to use reserved data block allocation */
 {
 	xfs_btree_cur_t		*cur;	/* btree cursor */
@@ -716,8 +733,8 @@ xfs_bmap_add_extent_delay_real(
 					/* left is 0, right is 1, prev is 2 */
 	int			rval=0;	/* return value (logging flags) */
 	int			state = 0;/* state bits, accessed thru macros */
-	xfs_filblks_t		temp;	/* value for dnew calculations */
-	xfs_filblks_t		temp2;	/* value for dnew calculations */
+	xfs_filblks_t		temp=0;	/* value for dnew calculations */
+	xfs_filblks_t		temp2=0;/* value for dnew calculations */
 	int			tmp_rval;	/* partial logging flags */
 	enum {				/* bit number definitions for state */
 		LEFT_CONTIG,	RIGHT_CONTIG,
@@ -839,6 +856,11 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 		}
 		*dnew = 0;
+		/* DELTA: Three in-core extents are replaced by one. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
@@ -872,6 +894,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 		}
 		*dnew = 0;
+		/* DELTA: Two in-core extents are replaced by one. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
@@ -906,6 +932,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 		}
 		*dnew = 0;
+		/* DELTA: Two in-core extents are replaced by one. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK2(LEFT_FILLING, RIGHT_FILLING):
@@ -936,6 +966,9 @@ xfs_bmap_add_extent_delay_real(
 			ASSERT(i == 1);
 		}
 		*dnew = 0;
+		/* DELTA: The in-core extent described by new changed type. */
+		temp = new->br_startoff;
+		temp2 = new->br_blockcount;
 		break;
 
 	case MASK2(LEFT_FILLING, LEFT_CONTIG):
@@ -978,6 +1011,10 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
 			XFS_DATA_FORK);
 		*dnew = temp;
+		/* DELTA: The boundary between two in-core extents moved. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount;
 		break;
 
 	case MASK(LEFT_FILLING):
@@ -1025,6 +1062,9 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
 			XFS_DATA_FORK);
 		*dnew = temp;
+		/* DELTA: One in-core extent is split in two. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
@@ -1067,6 +1107,10 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		*dnew = temp;
+		/* DELTA: The boundary between two in-core extents moved. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK(RIGHT_FILLING):
@@ -1112,6 +1156,9 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
+		/* DELTA: One in-core extent is split in two. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case 0:
@@ -1194,6 +1241,9 @@ xfs_bmap_add_extent_delay_real(
 		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
 			XFS_DATA_FORK);
 		*dnew = temp + temp2;
+		/* DELTA: One in-core extent is split in three. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
@@ -1209,6 +1259,13 @@ xfs_bmap_add_extent_delay_real(
 		ASSERT(0);
 	}
 	*curp = cur;
+	if (delta) {
+		temp2 += temp;
+		if (delta->xed_startoff > temp)
+			delta->xed_startoff = temp;
+		if (delta->xed_blockcount < temp2)
+			delta->xed_blockcount = temp2;
+	}
 done:
 	*logflagsp = rval;
 	return error;
@@ -1235,7 +1292,8 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_extnum_t		idx,	/* extent number to update/insert */
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
-	int			*logflagsp) /* inode logging flags */
+	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta) /* Change made to incore extents */
 {
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
@@ -1252,6 +1310,8 @@ xfs_bmap_add_extent_unwritten_real(
 					/* left is 0, right is 1, prev is 2 */
 	int			rval=0;	/* return value (logging flags) */
 	int			state = 0;/* state bits, accessed thru macros */
+	xfs_filblks_t		temp=0;
+	xfs_filblks_t		temp2=0;
 	enum {				/* bit number definitions for state */
 		LEFT_CONTIG,	RIGHT_CONTIG,
 		LEFT_FILLING,	RIGHT_FILLING,
@@ -1380,6 +1440,11 @@ xfs_bmap_add_extent_unwritten_real(
 				RIGHT.br_blockcount, LEFT.br_state)))
 				goto done;
 		}
+		/* DELTA: Three in-core extents are replaced by one. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
@@ -1419,6 +1484,10 @@ xfs_bmap_add_extent_unwritten_real(
 				LEFT.br_state)))
 				goto done;
 		}
+		/* DELTA: Two in-core extents are replaced by one. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
@@ -1459,6 +1528,10 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+		/* DELTA: Two in-core extents are replaced by one. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK2(LEFT_FILLING, RIGHT_FILLING):
@@ -1487,6 +1560,9 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+		/* DELTA: The in-core extent described by new changed type. */
+		temp = new->br_startoff;
+		temp2 = new->br_blockcount;
 		break;
 
 	case MASK2(LEFT_FILLING, LEFT_CONTIG):
@@ -1534,6 +1610,10 @@ xfs_bmap_add_extent_unwritten_real(
 				LEFT.br_state))
 				goto done;
 		}
+		/* DELTA: The boundary between two in-core extents moved. */
+		temp = LEFT.br_startoff;
+		temp2 = LEFT.br_blockcount +
+			PREV.br_blockcount;
 		break;
 
 	case MASK(LEFT_FILLING):
@@ -1574,6 +1654,9 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			ASSERT(i == 1);
 		}
+		/* DELTA: One in-core extent is split in two. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
@@ -1617,6 +1700,10 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+		/* DELTA: The boundary between two in-core extents moved. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount +
+			RIGHT.br_blockcount;
 		break;
 
 	case MASK(RIGHT_FILLING):
@@ -1657,6 +1744,9 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			ASSERT(i == 1);
 		}
+		/* DELTA: One in-core extent is split in two. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case 0:
@@ -1710,6 +1800,9 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			ASSERT(i == 1);
 		}
+		/* DELTA: One in-core extent is split in three. */
+		temp = PREV.br_startoff;
+		temp2 = PREV.br_blockcount;
 		break;
 
 	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
@@ -1725,6 +1818,13 @@ xfs_bmap_add_extent_unwritten_real(
 		ASSERT(0);
 	}
 	*curp = cur;
+	if (delta) {
+		temp2 += temp;
+		if (delta->xed_startoff > temp)
+			delta->xed_startoff = temp;
+		if (delta->xed_blockcount < temp2)
+			delta->xed_blockcount = temp2;
+	}
 done:
 	*logflagsp = rval;
 	return error;
@@ -1753,6 +1853,7 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			rsvd)		/* OK to allocate reserved blocks */
 {
 	xfs_bmbt_rec_t		*ep;	/* extent record for idx */
@@ -1765,7 +1866,8 @@ xfs_bmap_add_extent_hole_delay(
 	xfs_filblks_t		oldlen=0;	/* old indirect size */
 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
 	int			state;  /* state bits, accessed thru macros */
-	xfs_filblks_t		temp;	/* temp for indirect calculations */
+	xfs_filblks_t		temp=0;	/* temp for indirect calculations */
+	xfs_filblks_t		temp2=0;
 	enum {				/* bit number definitions for state */
 		LEFT_CONTIG,	RIGHT_CONTIG,
 		LEFT_DELAY,	RIGHT_DELAY,
@@ -1844,6 +1946,9 @@ xfs_bmap_add_extent_hole_delay(
 			XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		ip->i_df.if_lastex = idx - 1;
+		/* DELTA: Two in-core extents were replaced by one. */
+		temp2 = temp;
+		temp = left.br_startoff;
 		break;
 
 	case MASK(LEFT_CONTIG):
@@ -1864,6 +1969,9 @@ xfs_bmap_add_extent_hole_delay(
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
+		/* DELTA: One in-core extent grew into a hole. */
+		temp2 = temp;
+		temp = left.br_startoff;
 		break;
 
 	case MASK(RIGHT_CONTIG):
@@ -1881,6 +1989,9 @@ xfs_bmap_add_extent_hole_delay(
 			NULLSTARTBLOCK((int)newlen), temp, right.br_state);
 		xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
+		/* DELTA: One in-core extent grew into a hole. */
+		temp2 = temp;
+		temp = new->br_startoff;
 		break;
 
 	case 0:
@@ -1894,6 +2005,9 @@ xfs_bmap_add_extent_hole_delay(
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
+		/* DELTA: A new in-core extent was added in a hole. */
+		temp2 = new->br_blockcount;
+		temp = new->br_startoff;
 		break;
 	}
 	if (oldlen != newlen) {
@@ -1904,6 +2018,13 @@ xfs_bmap_add_extent_hole_delay(
 		 * Nothing to do for disk quota accounting here.
 		 */
 	}
+	if (delta) {
+		temp2 += temp;
+		if (delta->xed_startoff > temp)
+			delta->xed_startoff = temp;
+		if (delta->xed_blockcount < temp2)
+			delta->xed_blockcount = temp2;
+	}
 	*logflagsp = 0;
 	return 0;
 #undef	MASK
@@ -1925,6 +2046,7 @@ xfs_bmap_add_extent_hole_real(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork) /* data or attr fork */
 {
 	xfs_bmbt_rec_t		*ep;	/* pointer to extent entry ins. point */
@@ -1936,7 +2058,10 @@ xfs_bmap_add_extent_hole_real(
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
+	int			rval=0;	/* return value (logging flags) */
 	int			state;	/* state bits, accessed thru macros */
+	xfs_filblks_t		temp=0;
+	xfs_filblks_t		temp2=0;
 	enum {				/* bit number definitions for state */
 		LEFT_CONTIG,	RIGHT_CONTIG,
 		LEFT_DELAY,	RIGHT_DELAY,
@@ -1993,6 +2118,7 @@ xfs_bmap_add_extent_hole_real(
 		 left.br_blockcount + new->br_blockcount +
 		     right.br_blockcount <= MAXEXTLEN));
 
+	error = 0;
 	/*
 	 * Select which case we're in here, and implement it.
 	 */
@@ -2018,25 +2144,35 @@ xfs_bmap_add_extent_hole_real(
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
 		if (cur == NULL) {
-			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
-			return 0;
+			rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+		} else {
+			rval = XFS_ILOG_CORE;
+			if ((error = xfs_bmbt_lookup_eq(cur,
+					right.br_startoff,
+					right.br_startblock,
+					right.br_blockcount, &i)))
+				goto done;
+			ASSERT(i == 1);
+			if ((error = xfs_bmbt_delete(cur, &i)))
+				goto done;
+			ASSERT(i == 1);
+			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+				goto done;
+			ASSERT(i == 1);
+			if ((error = xfs_bmbt_update(cur, left.br_startoff,
+					left.br_startblock,
+					left.br_blockcount +
+						new->br_blockcount +
+						right.br_blockcount,
+					left.br_state)))
+				goto done;
 		}
-		*logflagsp = XFS_ILOG_CORE;
-		if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
-				right.br_startblock, right.br_blockcount, &i)))
-			return error;
-		ASSERT(i == 1);
-		if ((error = xfs_bmbt_delete(cur, &i)))
-			return error;
-		ASSERT(i == 1);
-		if ((error = xfs_bmbt_decrement(cur, 0, &i)))
-			return error;
-		ASSERT(i == 1);
-		error = xfs_bmbt_update(cur, left.br_startoff,
-				left.br_startblock,
-				left.br_blockcount + new->br_blockcount +
-				right.br_blockcount, left.br_state);
-		return error;
+		/* DELTA: Two in-core extents were replaced by one. */
+		temp = left.br_startoff;
+		temp2 = left.br_blockcount +
+			new->br_blockcount +
+			right.br_blockcount;
+		break;
 
 	case MASK(LEFT_CONTIG):
 		/*
@@ -2050,19 +2186,27 @@ xfs_bmap_add_extent_hole_real(
 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
 		ifp->if_lastex = idx - 1;
 		if (cur == NULL) {
-			*logflagsp = XFS_ILOG_FEXT(whichfork);
-			return 0;
+			rval = XFS_ILOG_FEXT(whichfork);
+		} else {
+			rval = 0;
+			if ((error = xfs_bmbt_lookup_eq(cur,
+					left.br_startoff,
+					left.br_startblock,
+					left.br_blockcount, &i)))
+				goto done;
+			ASSERT(i == 1);
+			if ((error = xfs_bmbt_update(cur, left.br_startoff,
+					left.br_startblock,
+					left.br_blockcount +
+						new->br_blockcount,
+					left.br_state)))
+				goto done;
 		}
-		*logflagsp = 0;
-		if ((error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
-				left.br_startblock, left.br_blockcount, &i)))
-			return error;
-		ASSERT(i == 1);
-		error = xfs_bmbt_update(cur, left.br_startoff,
-				left.br_startblock,
-				left.br_blockcount + new->br_blockcount,
-				left.br_state);
-		return error;
+		/* DELTA: One in-core extent grew. */
+		temp = left.br_startoff;
+		temp2 = left.br_blockcount +
+			new->br_blockcount;
+		break;
 
 	case MASK(RIGHT_CONTIG):
 		/*
@@ -2077,19 +2221,27 @@ xfs_bmap_add_extent_hole_real(
 		xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
 		ifp->if_lastex = idx;
 		if (cur == NULL) {
-			*logflagsp = XFS_ILOG_FEXT(whichfork);
-			return 0;
+			rval = XFS_ILOG_FEXT(whichfork);
+		} else {
+			rval = 0;
+			if ((error = xfs_bmbt_lookup_eq(cur,
+					right.br_startoff,
+					right.br_startblock,
+					right.br_blockcount, &i)))
+				goto done;
+			ASSERT(i == 1);
+			if ((error = xfs_bmbt_update(cur, new->br_startoff,
+					new->br_startblock,
+					new->br_blockcount +
+						right.br_blockcount,
+					right.br_state)))
+				goto done;
 		}
-		*logflagsp = 0;
-		if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
-				right.br_startblock, right.br_blockcount, &i)))
-			return error;
-		ASSERT(i == 1);
-		error = xfs_bmbt_update(cur, new->br_startoff,
-				new->br_startblock,
-				new->br_blockcount + right.br_blockcount,
-				right.br_state);
-		return error;
+		/* DELTA: One in-core extent grew. */
+		temp = new->br_startoff;
+		temp2 = new->br_blockcount +
+			right.br_blockcount;
+		break;
 
 	case 0:
 		/*
@@ -2104,29 +2256,41 @@ xfs_bmap_add_extent_hole_real(
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
 		if (cur == NULL) {
-			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
-			return 0;
+			rval = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+		} else {
+			rval = XFS_ILOG_CORE;
+			if ((error = xfs_bmbt_lookup_eq(cur,
+					new->br_startoff,
+					new->br_startblock,
+					new->br_blockcount, &i)))
+				goto done;
+			ASSERT(i == 0);
+			cur->bc_rec.b.br_state = new->br_state;
+			if ((error = xfs_bmbt_insert(cur, &i)))
+				goto done;
+			ASSERT(i == 1);
 		}
-		*logflagsp = XFS_ILOG_CORE;
-		if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
-				new->br_startblock, new->br_blockcount, &i)))
-			return error;
-		ASSERT(i == 0);
-		cur->bc_rec.b.br_state = new->br_state;
-		if ((error = xfs_bmbt_insert(cur, &i)))
-			return error;
-		ASSERT(i == 1);
-		return 0;
+		/* DELTA: A new extent was added in a hole. */
+		temp = new->br_startoff;
+		temp2 = new->br_blockcount;
+		break;
+	}
+	if (delta) {
+		temp2 += temp;
+		if (delta->xed_startoff > temp)
+			delta->xed_startoff = temp;
+		if (delta->xed_blockcount < temp2)
+			delta->xed_blockcount = temp2;
 	}
+done:
+	*logflagsp = rval;
+	return error;
 #undef	MASK
 #undef	MASK2
 #undef	STATE_SET
 #undef	STATE_TEST
 #undef	STATE_SET_TEST
 #undef	SWITCH_STATE
-	/* NOTREACHED */
-	ASSERT(0);
-	return 0; /* keep gcc quite */
 }
 
 /*
@@ -2598,6 +2762,7 @@ xfs_bmap_btalloc(
 	args.mp = mp;
 	args.fsbno = ap->rval;
 	args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+	args.firstblock = ap->firstblock;
 	blen = 0;
 	if (nullfb) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
@@ -2657,7 +2822,7 @@ xfs_bmap_btalloc(
 		else
 			args.minlen = ap->alen;
 	} else if (ap->low) {
-		args.type = XFS_ALLOCTYPE_FIRST_AG;
+		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.total = args.minlen = ap->minlen;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -2669,7 +2834,7 @@ xfs_bmap_btalloc(
 		args.prod = ap->ip->i_d.di_extsize;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
-	} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
+	} else if (mp->m_sb.sb_blocksize >= NBPP) {
 		args.prod = 1;
 		args.mod = 0;
 	} else {
@@ -2885,6 +3050,7 @@ xfs_bmap_del_extent(
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
 	int			*logflagsp, /* inode logging flags */
+	xfs_extdelta_t		*delta, /* Change made to incore extents */
 	int			whichfork, /* data or attr fork */
 	int			rsvd)	/* OK to allocate reserved blocks */
 {
@@ -3193,6 +3359,14 @@ xfs_bmap_del_extent(
 	if (da_old > da_new)
 		xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
 			rsvd);
+	if (delta) {
+		/* DELTA: report the original extent. */
+		if (delta->xed_startoff > got.br_startoff)
+			delta->xed_startoff = got.br_startoff;
+		if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
+			delta->xed_blockcount = got.br_startoff +
+							got.br_blockcount;
+	}
 done:
 	*logflagsp = flags;
 	return error;
@@ -3279,6 +3453,7 @@ xfs_bmap_extents_to_btree(
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
 	args.tp = tp;
 	args.mp = mp;
+	args.firstblock = *firstblock;
 	if (*firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -3414,6 +3589,7 @@ xfs_bmap_local_to_extents(
 
 		args.tp = tp;
 		args.mp = ip->i_mount;
+		args.firstblock = *firstblock;
 		ASSERT((ifp->if_flags &
 			(XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
 		/*
@@ -3753,7 +3929,7 @@ xfs_bunmap_trace(
 	if (ip->i_rwtrace == NULL)
 		return;
 	ktrace_enter(ip->i_rwtrace,
-		(void *)(__psint_t)XFS_BUNMAPI,
+		(void *)(__psint_t)XFS_BUNMAP,
 		(void *)ip,
 		(void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
 		(void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
@@ -4087,8 +4263,8 @@ xfs_bmap_finish(
 			if (!XFS_FORCED_SHUTDOWN(mp))
 				xfs_force_shutdown(mp,
 						   (error == EFSCORRUPTED) ?
-						   XFS_CORRUPT_INCORE :
-						   XFS_METADATA_IO_ERROR);
+						   SHUTDOWN_CORRUPT_INCORE :
+						   SHUTDOWN_META_IO_ERROR);
 			return error;
 		}
 		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
@@ -4538,7 +4714,8 @@ xfs_bmapi(
 	xfs_extlen_t	total,		/* total blocks needed */
 	xfs_bmbt_irec_t	*mval,		/* output: map values */
 	int		*nmap,		/* i/o: mval size/count */
-	xfs_bmap_free_t	*flist)		/* i/o: list extents to free */
+	xfs_bmap_free_t	*flist,		/* i/o: list extents to free */
+	xfs_extdelta_t	*delta)		/* o: change made to incore extents */
 {
 	xfs_fsblock_t	abno;		/* allocated block number */
 	xfs_extlen_t	alen;		/* allocated extent length */
@@ -4650,6 +4827,10 @@ xfs_bmapi(
 	end = bno + len;
 	obno = bno;
 	bma.ip = NULL;
+	if (delta) {
+		delta->xed_startoff = NULLFILEOFF;
+		delta->xed_blockcount = 0;
+	}
 	while (bno < end && n < *nmap) {
 		/*
 		 * Reading past eof, act as though there's a hole
@@ -4886,8 +5067,8 @@ xfs_bmapi(
 					got.br_state = XFS_EXT_UNWRITTEN;
 			}
 			error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
-				firstblock, flist, &tmp_logflags, whichfork,
-				(flags & XFS_BMAPI_RSVBLOCKS));
+				firstblock, flist, &tmp_logflags, delta,
+				whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -4983,8 +5164,8 @@ xfs_bmapi(
 			}
 			mval->br_state = XFS_EXT_NORM;
 			error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
-				firstblock, flist, &tmp_logflags, whichfork,
-				(flags & XFS_BMAPI_RSVBLOCKS));
+				firstblock, flist, &tmp_logflags, delta,
+				whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
 			logflags |= tmp_logflags;
 			if (error)
 				goto error0;
@@ -5073,7 +5254,14 @@ xfs_bmapi(
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
 	       XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
 	error = 0;
-
+	if (delta && delta->xed_startoff != NULLFILEOFF) {
+		/* A change was actually made.
+		 * Note that delta->xed_blockount is an offset at this
+		 * point and needs to be converted to a block count.
+		 */
+		ASSERT(delta->xed_blockcount > delta->xed_startoff);
+		delta->xed_blockcount -= delta->xed_startoff;
+	}
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
@@ -5185,6 +5373,8 @@ xfs_bunmapi(
 	xfs_fsblock_t		*firstblock,	/* first allocated block
 						   controls a.g. for allocs */
 	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	xfs_extdelta_t		*delta,		/* o: change made to incore
+						   extents */
 	int			*done)		/* set if not done yet */
 {
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
@@ -5242,6 +5432,10 @@ xfs_bunmapi(
 	bno = start + len - 1;
 	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
 		&prev);
+	if (delta) {
+		delta->xed_startoff = NULLFILEOFF;
+		delta->xed_blockcount = 0;
+	}
 	/*
 	 * Check to see if the given block number is past the end of the
 	 * file, back up to the last block if so...
@@ -5340,7 +5534,8 @@ xfs_bunmapi(
 			}
 			del.br_state = XFS_EXT_UNWRITTEN;
 			error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
-				firstblock, flist, &logflags, XFS_DATA_FORK, 0);
+				firstblock, flist, &logflags, delta,
+				XFS_DATA_FORK, 0);
 			if (error)
 				goto error0;
 			goto nodelete;
@@ -5394,7 +5589,7 @@ xfs_bunmapi(
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
 					&prev, firstblock, flist, &logflags,
-					XFS_DATA_FORK, 0);
+					delta, XFS_DATA_FORK, 0);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5403,7 +5598,7 @@ xfs_bunmapi(
 				del.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent(ip, lastx, &cur,
 					&del, firstblock, flist, &logflags,
-					XFS_DATA_FORK, 0);
+					delta, XFS_DATA_FORK, 0);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5456,7 +5651,7 @@ xfs_bunmapi(
 			goto error0;
 		}
 		error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
-			&tmp_logflags, whichfork, rsvd);
+				&tmp_logflags, delta, whichfork, rsvd);
 		logflags |= tmp_logflags;
 		if (error)
 			goto error0;
@@ -5513,6 +5708,14 @@ nodelete:
 	ASSERT(ifp->if_ext_max ==
 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
 	error = 0;
+	if (delta && delta->xed_startoff != NULLFILEOFF) {
+		/* A change was actually made.
+		 * Note that delta->xed_blockount is an offset at this
+		 * point and needs to be converted to a block count.
+		 */
+		ASSERT(delta->xed_blockcount > delta->xed_startoff);
+		delta->xed_blockcount -= delta->xed_startoff;
+	}
 error0:
 	/*
 	 * Log everything.  Do this after conversion, there's no point in
@@ -5556,7 +5759,7 @@ xfs_getbmap(
 	__int64_t		fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
 	xfs_inode_t		*ip;		/* xfs incore inode pointer */
-	vnode_t			*vp;		/* corresponding vnode */
+	bhv_vnode_t		*vp;		/* corresponding vnode */
 	int			lock;		/* lock state */
 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
 	xfs_mount_t		*mp;		/* file system mount point */
@@ -5653,7 +5856,7 @@ xfs_getbmap(
 
 	if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) {
 		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
-		VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
+		error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
 	}
 
 	ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
@@ -5689,7 +5892,8 @@ xfs_getbmap(
 		nmap = (nexleft > subnex) ? subnex : nexleft;
 		error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
 				  XFS_BB_TO_FSB(mp, bmv->bmv_length),
-				  bmapi_flags, NULL, 0, map, &nmap, NULL);
+				  bmapi_flags, NULL, 0, map, &nmap,
+				  NULL, NULL);
 		if (error)
 			goto unlock_and_return;
 		ASSERT(nmap <= subnex);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 8e0d73d9ccc..80e93409b78 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -26,6 +26,20 @@ struct xfs_mount;
 struct xfs_trans;
 
 /*
+ * DELTA: describe a change to the in-core extent list.
+ *
+ * Internally the use of xed_blockount is somewhat funky.
+ * xed_blockcount contains an offset much of the time because this
+ * makes merging changes easier.  (xfs_fileoff_t and xfs_filblks_t are
+ * the same underlying type).
+ */
+typedef struct xfs_extdelta
+{
+	xfs_fileoff_t		xed_startoff;	/* offset of range */
+	xfs_filblks_t		xed_blockcount;	/* blocks in range */
+} xfs_extdelta_t;
+
+/*
  * List of extents to be free "later".
  * The list is kept sorted on xbf_startblock.
  */
@@ -275,7 +289,9 @@ xfs_bmapi(
 	xfs_extlen_t		total,		/* total blocks needed */
 	struct xfs_bmbt_irec	*mval,		/* output: map values */
 	int			*nmap,		/* i/o: mval size/count */
-	xfs_bmap_free_t		*flist);	/* i/o: list extents to free */
+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	xfs_extdelta_t		*delta);	/* o: change made to incore
+						   extents */
 
 /*
  * Map file blocks to filesystem blocks, simple version.
@@ -309,6 +325,8 @@ xfs_bunmapi(
 	xfs_fsblock_t		*firstblock,	/* first allocated block
 						   controls a.g. for allocs */
 	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	xfs_extdelta_t		*delta,		/* o: change made to incore
+						   extents */
 	int			*done);		/* set if not done yet */
 
 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bea44709afb..18fb7385d71 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -1569,12 +1567,11 @@ xfs_bmbt_split(
 	lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
 	left = XFS_BUF_TO_BMBT_BLOCK(lbp);
 	args.fsbno = cur->bc_private.b.firstblock;
+	args.firstblock = args.fsbno;
 	if (args.fsbno == NULLFSBLOCK) {
 		args.fsbno = lbno;
 		args.type = XFS_ALLOCTYPE_START_BNO;
-	} else if (cur->bc_private.b.flist->xbf_low)
-		args.type = XFS_ALLOCTYPE_FIRST_AG;
-	else
+	} else
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	args.mod = args.minleft = args.alignment = args.total = args.isfl =
 		args.userdata = args.minalignslop = 0;
@@ -2356,6 +2353,7 @@ xfs_bmbt_newroot(
 		args.userdata = args.minalignslop = 0;
 	args.minlen = args.maxlen = args.prod = 1;
 	args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+	args.firstblock = args.fsbno;
 	if (args.fsbno == NULLFSBLOCK) {
 #ifdef DEBUG
 		if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
@@ -2365,9 +2363,7 @@ xfs_bmbt_newroot(
 #endif
 		args.fsbno = INT_GET(*pp, ARCH_CONVERT);
 		args.type = XFS_ALLOCTYPE_START_BNO;
-	} else if (args.wasdel)
-		args.type = XFS_ALLOCTYPE_FIRST_AG;
-	else
+	} else
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	if ((error = xfs_alloc_vextent(&args))) {
 		XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 52d5d095fc3..ee2255bd656 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 5fed15682dd..a4aa53974f7 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -23,7 +23,6 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_buf_item.h"
@@ -1030,9 +1029,9 @@ xfs_buf_iodone_callbacks(
 		if ((XFS_BUF_TARGET(bp) != lasttarg) ||
 		    (time_after(jiffies, (lasttime + 5*HZ)))) {
 			lasttime = jiffies;
-			prdev("XFS write error in file system meta-data "
-			      "block 0x%llx in %s",
-			      XFS_BUF_TARGET(bp),
+			cmn_err(CE_ALERT, "Device %s, XFS metadata write error"
+					" block 0x%llx in %s",
+				XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
 			      (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname);
 		}
 		lasttarg = XFS_BUF_TARGET(bp);
@@ -1108,7 +1107,7 @@ xfs_buf_error_relse(
 	XFS_BUF_ERROR(bp,0);
 	xfs_buftrace("BUF_ERROR_RELSE", bp);
 	if (! XFS_FORCED_SHUTDOWN(mp))
-		xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 	/*
 	 * We have to unpin the pinned buffers so do the
 	 * callbacks.
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
index d0035c6e951..7a0e482dd43 100644
--- a/fs/xfs/xfs_cap.h
+++ b/fs/xfs/xfs_cap.h
@@ -49,12 +49,12 @@ typedef struct xfs_cap_set {
 
 #include <linux/posix_cap_xattr.h>
 
-struct vnode;
+struct bhv_vnode;
 
-extern int xfs_cap_vhascap(struct vnode *);
-extern int xfs_cap_vset(struct vnode *, void *, size_t);
-extern int xfs_cap_vget(struct vnode *, void *, size_t);
-extern int xfs_cap_vremove(struct vnode *vp);
+extern int xfs_cap_vhascap(struct bhv_vnode *);
+extern int xfs_cap_vset(struct bhv_vnode *, void *, size_t);
+extern int xfs_cap_vget(struct bhv_vnode *, void *, size_t);
+extern int xfs_cap_vremove(struct bhv_vnode *);
 
 #define _CAP_EXISTS		xfs_cap_vhascap
 
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 8988b905117..32ab61d17ac 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -43,7 +41,6 @@
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
@@ -159,7 +156,7 @@ xfs_da_split(xfs_da_state_t *state)
 	max = state->path.active - 1;
 	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
 	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
-	       state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+	       state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
 
 	addblk = &state->path.blk[max];		/* initial dummy value */
 	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
@@ -199,38 +196,7 @@ xfs_da_split(xfs_da_state_t *state)
 				return(error);	/* GROT: attr inconsistent */
 			addblk = newblk;
 			break;
-		case XFS_DIR_LEAF_MAGIC:
-			ASSERT(XFS_DIR_IS_V1(state->mp));
-			error = xfs_dir_leaf_split(state, oldblk, newblk);
-			if ((error != 0) && (error != ENOSPC)) {
-				return(error);	/* GROT: dir is inconsistent */
-			}
-			if (!error) {
-				addblk = newblk;
-				break;
-			}
-			/*
-			 * Entry wouldn't fit, split the leaf again.
-			 */
-			state->extravalid = 1;
-			if (state->inleaf) {
-				state->extraafter = 0;	/* before newblk */
-				error = xfs_dir_leaf_split(state, oldblk,
-							   &state->extrablk);
-				if (error)
-					return(error);	/* GROT: dir incon. */
-				addblk = newblk;
-			} else {
-				state->extraafter = 1;	/* after newblk */
-				error = xfs_dir_leaf_split(state, newblk,
-							   &state->extrablk);
-				if (error)
-					return(error);	/* GROT: dir incon. */
-				addblk = newblk;
-			}
-			break;
 		case XFS_DIR2_LEAFN_MAGIC:
-			ASSERT(XFS_DIR_IS_V2(state->mp));
 			error = xfs_dir2_leafn_split(state, oldblk, newblk);
 			if (error)
 				return error;
@@ -363,7 +329,6 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
 			     (char *)oldroot);
 	} else {
-		ASSERT(XFS_DIR_IS_V2(mp));
 		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
 		leaf = (xfs_dir2_leaf_t *)oldroot;
 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
@@ -379,8 +344,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 	 * Set up the new root node.
 	 */
 	error = xfs_da_node_create(args,
-		args->whichfork == XFS_DATA_FORK &&
-		XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
+		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
 	if (error)
 		return(error);
@@ -427,10 +391,9 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 
 	/*
-	 * With V2 the extra block is data or freespace.
+	 * With V2 dirs the extra block is data or freespace.
 	 */
-	useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) ||
-			state->args->whichfork == XFS_ATTR_FORK);
+	useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
 	newcount = 1 + useextra;
 	/*
 	 * Do we have to split the node?
@@ -624,7 +587,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
 	ASSERT(newblk->blkno != 0);
-	if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+	if (state->args->whichfork == XFS_DATA_FORK)
 		ASSERT(newblk->blkno >= mp->m_dirleafblk &&
 		       newblk->blkno < mp->m_dirfreeblk);
 
@@ -670,7 +633,7 @@ xfs_da_join(xfs_da_state_t *state)
 	save_blk = &state->altpath.blk[ state->path.active-1 ];
 	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
-	       drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+	       drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
 
 	/*
 	 * Walk back up the tree joining/deallocating as necessary.
@@ -693,17 +656,7 @@ xfs_da_join(xfs_da_state_t *state)
 				return(0);
 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
 			break;
-		case XFS_DIR_LEAF_MAGIC:
-			ASSERT(XFS_DIR_IS_V1(state->mp));
-			error = xfs_dir_leaf_toosmall(state, &action);
-			if (error)
-				return(error);
-			if (action == 0)
-				return(0);
-			xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
-			break;
 		case XFS_DIR2_LEAFN_MAGIC:
-			ASSERT(XFS_DIR_IS_V2(state->mp));
 			error = xfs_dir2_leafn_toosmall(state, &action);
 			if (error)
 				return error;
@@ -790,7 +743,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 	ASSERT(bp != NULL);
 	blkinfo = bp->data;
 	if (be16_to_cpu(oldroot->hdr.level) == 1) {
-		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
 		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
 	} else {
 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
@@ -951,14 +904,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 		if (count == 0)
 			return;
 		break;
-	case XFS_DIR_LEAF_MAGIC:
-		ASSERT(XFS_DIR_IS_V1(state->mp));
-		lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
-		if (count == 0)
-			return;
-		break;
 	case XFS_DIR2_LEAFN_MAGIC:
-		ASSERT(XFS_DIR_IS_V2(state->mp));
 		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
 		if (count == 0)
 			return;
@@ -1117,10 +1063,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 	 * Descend thru the B-tree searching each level for the right
 	 * node to use, until the right hashval is found.
 	 */
-	if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
-		blkno = state->mp->m_dirleafblk;
-	else
-		blkno = 0;
+	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
 	for (blk = &state->path.blk[0], state->path.active = 1;
 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
 			 blk++, state->path.active++) {
@@ -1137,7 +1080,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 		}
 		curr = blk->bp->data;
 		ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
-		       be16_to_cpu(curr->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC ||
 		       be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
 
 		/*
@@ -1190,16 +1133,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 				blk->index = probe;
 				blkno = be32_to_cpu(btree->before);
 			}
-		}
-		else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
+		} else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 			break;
-		}
-		else if (be16_to_cpu(curr->magic) == XFS_DIR_LEAF_MAGIC) {
-			blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
-			break;
-		}
-		else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
+		} else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
 			break;
 		}
@@ -1212,12 +1149,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 	 * next leaf and keep searching.
 	 */
 	for (;;) {
-		if (blk->magic == XFS_DIR_LEAF_MAGIC) {
-			ASSERT(XFS_DIR_IS_V1(state->mp));
-			retval = xfs_dir_leaf_lookup_int(blk->bp, args,
-								  &blk->index);
-		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
-			ASSERT(XFS_DIR_IS_V2(state->mp));
+		if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
 							&blk->index, state);
 		}
@@ -1270,7 +1202,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	old_info = old_blk->bp->data;
 	new_info = new_blk->bp->data;
 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
-	       old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
 	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
@@ -1280,12 +1212,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 	case XFS_ATTR_LEAF_MAGIC:
 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
 		break;
-	case XFS_DIR_LEAF_MAGIC:
-		ASSERT(XFS_DIR_IS_V1(state->mp));
-		before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
-		break;
 	case XFS_DIR2_LEAFN_MAGIC:
-		ASSERT(XFS_DIR_IS_V2(state->mp));
 		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
 		break;
 	case XFS_DA_NODE_MAGIC:
@@ -1404,7 +1331,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 	save_info = save_blk->bp->data;
 	drop_info = drop_blk->bp->data;
 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
-	       save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
 	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
 	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
@@ -1529,7 +1456,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 		ASSERT(blk->bp != NULL);
 		info = blk->bp->data;
 		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
-		       be16_to_cpu(info->magic) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+		       be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
 		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
 		blk->magic = be16_to_cpu(info->magic);
 		if (blk->magic == XFS_DA_NODE_MAGIC) {
@@ -1548,20 +1475,13 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
 								      NULL);
 				break;
-			case XFS_DIR_LEAF_MAGIC:
-				ASSERT(XFS_DIR_IS_V1(state->mp));
-				blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
-								     NULL);
-				break;
 			case XFS_DIR2_LEAFN_MAGIC:
-				ASSERT(XFS_DIR_IS_V2(state->mp));
 				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
 								       NULL);
 				break;
 			default:
 				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
-				       blk->magic ==
-				       XFS_DIRX_LEAF_MAGIC(state->mp));
+				       blk->magic == XFS_DIR2_LEAFN_MAGIC);
 				break;
 			}
 		}
@@ -1620,7 +1540,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 	xfs_bmbt_irec_t	*mapp;
 	xfs_inode_t *dp;
 	int nmap, error, w, count, c, got, i, mapi;
-	xfs_fsize_t size;
 	xfs_trans_t *tp;
 	xfs_mount_t *mp;
 
@@ -1631,7 +1550,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 	/*
 	 * For new directories adjust the file offset and block count.
 	 */
-	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
+	if (w == XFS_DATA_FORK) {
 		bno = mp->m_dirleafblk;
 		count = mp->m_dirblkfsbs;
 	} else {
@@ -1641,10 +1560,9 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 	/*
 	 * Find a spot in the file space to put the new block.
 	 */
-	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
+	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
 		return error;
-	}
-	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+	if (w == XFS_DATA_FORK)
 		ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
 	/*
 	 * Try mapping it in one filesystem block.
@@ -1655,7 +1573,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 			XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
 			XFS_BMAPI_CONTIG,
 			args->firstblock, args->total, &map, &nmap,
-			args->flist))) {
+			args->flist, NULL))) {
 		return error;
 	}
 	ASSERT(nmap <= 1);
@@ -1676,7 +1594,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 					XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
 					XFS_BMAPI_METADATA,
 					args->firstblock, args->total,
-					&mapp[mapi], &nmap, args->flist))) {
+					&mapp[mapi], &nmap, args->flist,
+					NULL))) {
 				kmem_free(mapp, sizeof(*mapp) * count);
 				return error;
 			}
@@ -1705,19 +1624,6 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 	if (mapp != &map)
 		kmem_free(mapp, sizeof(*mapp) * count);
 	*new_blkno = (xfs_dablk_t)bno;
-	/*
-	 * For version 1 directories, adjust the file size if it changed.
-	 */
-	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
-		ASSERT(mapi == 1);
-		if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
-			return error;
-		size = XFS_FSB_TO_B(mp, bno);
-		if (size != dp->i_d.di_size) {
-			dp->i_d.di_size = size;
-			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-		}
-	}
 	return 0;
 }
 
@@ -1742,7 +1648,6 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	int error, w, entno, level, dead_level;
 	xfs_da_blkinfo_t *dead_info, *sib_info;
 	xfs_da_intnode_t *par_node, *dead_node;
-	xfs_dir_leafblock_t *dead_leaf;
 	xfs_dir2_leaf_t *dead_leaf2;
 	xfs_dahash_t dead_hash;
 
@@ -1753,11 +1658,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	w = args->whichfork;
 	ASSERT(w == XFS_DATA_FORK);
 	mp = ip->i_mount;
-	if (XFS_DIR_IS_V2(mp)) {
-		lastoff = mp->m_dirfreeblk;
-		error = xfs_bmap_last_before(tp, ip, &lastoff, w);
-	} else
-		error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
+	lastoff = mp->m_dirfreeblk;
+	error = xfs_bmap_last_before(tp, ip, &lastoff, w);
 	if (error)
 		return error;
 	if (unlikely(lastoff == 0)) {
@@ -1780,14 +1682,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 	/*
 	 * Get values from the moved block.
 	 */
-	if (be16_to_cpu(dead_info->magic) == XFS_DIR_LEAF_MAGIC) {
-		ASSERT(XFS_DIR_IS_V1(mp));
-		dead_leaf = (xfs_dir_leafblock_t *)dead_info;
-		dead_level = 0;
-		dead_hash =
-			INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
-	} else if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
-		ASSERT(XFS_DIR_IS_V2(mp));
+	if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 		dead_level = 0;
 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
@@ -1842,7 +1737,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 		xfs_da_buf_done(sib_buf);
 		sib_buf = NULL;
 	}
-	par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
+	par_blkno = mp->m_dirleafblk;
 	level = -1;
 	/*
 	 * Walk down the tree looking for the parent of the moved block.
@@ -1941,8 +1836,6 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 {
 	xfs_inode_t *dp;
 	int done, error, w, count;
-	xfs_fileoff_t bno;
-	xfs_fsize_t size;
 	xfs_trans_t *tp;
 	xfs_mount_t *mp;
 
@@ -1950,7 +1843,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 	w = args->whichfork;
 	tp = args->trans;
 	mp = dp->i_mount;
-	if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+	if (w == XFS_DATA_FORK)
 		count = mp->m_dirblkfsbs;
 	else
 		count = 1;
@@ -1961,34 +1854,17 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 		 */
 		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
 				XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
-				0, args->firstblock, args->flist,
+				0, args->firstblock, args->flist, NULL,
 				&done)) == ENOSPC) {
 			if (w != XFS_DATA_FORK)
-				goto done;
+				break;
 			if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
 					&dead_buf)))
-				goto done;
-		} else if (error)
-			goto done;
-		else
+				break;
+		} else {
 			break;
-	}
-	ASSERT(done);
-	xfs_da_binval(tp, dead_buf);
-	/*
-	 * Adjust the directory size for version 1.
-	 */
-	if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
-		if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
-			return error;
-		size = XFS_FSB_TO_B(dp->i_mount, bno);
-		if (size != dp->i_d.di_size) {
-			dp->i_d.di_size = size;
-			xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 		}
 	}
-	return 0;
-done:
 	xfs_da_binval(tp, dead_buf);
 	return error;
 }
@@ -2049,10 +1925,7 @@ xfs_da_do_buf(
 	xfs_dabuf_t	*rbp;
 
 	mp = dp->i_mount;
-	if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
-		nfsb = mp->m_dirblkfsbs;
-	else
-		nfsb = 1;
+	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
 	mappedbno = *mappedbnop;
 	/*
 	 * Caller doesn't have a mapping.  -2 means don't complain
@@ -2086,7 +1959,7 @@ xfs_da_do_buf(
 					nfsb,
 					XFS_BMAPI_METADATA |
 						XFS_BMAPI_AFLAG(whichfork),
-					NULL, 0, mapp, &nmap, NULL)))
+					NULL, 0, mapp, &nmap, NULL, NULL)))
 				goto exit0;
 		}
 	} else {
@@ -2198,7 +2071,6 @@ xfs_da_do_buf(
 		magic1 = be32_to_cpu(data->hdr.magic);
 		if (unlikely(
 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
-				   (magic != XFS_DIR_LEAF_MAGIC) &&
 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
 				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 243a730d5ec..4ab865ec8b8 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -36,14 +36,10 @@ struct zone;
  * level in the Btree, and to identify which type of block this is.
  */
 #define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
-#define XFS_DIR_LEAF_MAGIC	0xfeeb	/* magic number: directory leaf blks */
 #define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */
 #define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */
 #define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */
 
-#define	XFS_DIRX_LEAF_MAGIC(mp)	\
-	(XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
-
 typedef struct xfs_da_blkinfo {
 	__be32		forw;			/* previous block in list */
 	__be32		back;			/* following block in list */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 4968a6358e6..80562b60fb9 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -54,24 +52,14 @@ xfs_swapext(
 	xfs_swapext_t	__user *sxu)
 {
 	xfs_swapext_t	*sxp;
-	xfs_inode_t     *ip=NULL, *tip=NULL, *ips[2];
-	xfs_trans_t     *tp;
+	xfs_inode_t     *ip=NULL, *tip=NULL;
 	xfs_mount_t     *mp;
-	xfs_bstat_t	*sbp;
 	struct file	*fp = NULL, *tfp = NULL;
-	vnode_t		*vp, *tvp;
-	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
-	int		ilf_fields, tilf_fields;
+	bhv_vnode_t	*vp, *tvp;
 	int		error = 0;
-	xfs_ifork_t	*tempifp, *ifp, *tifp;
-	__uint64_t	tmp;
-	int		aforkblks = 0;
-	int		taforkblks = 0;
-	char		locked = 0;
 
 	sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
-	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
-	if (!sxp || !tempifp) {
+	if (!sxp) {
 		error = XFS_ERROR(ENOMEM);
 		goto error0;
 	}
@@ -118,14 +106,56 @@ xfs_swapext(
 
 	mp = ip->i_mount;
 
-	sbp = &sxp->sx_stat;
-
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		error =  XFS_ERROR(EIO);
 		goto error0;
 	}
 
-	locked = 1;
+	error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp);
+
+ error0:
+	if (fp != NULL)
+		fput(fp);
+	if (tfp != NULL)
+		fput(tfp);
+
+	if (sxp != NULL)
+		kmem_free(sxp, sizeof(xfs_swapext_t));
+
+	return error;
+}
+
+int
+xfs_swap_extents(
+	xfs_inode_t	*ip,
+	xfs_inode_t	*tip,
+	xfs_swapext_t	*sxp)
+{
+	xfs_mount_t	*mp;
+	xfs_inode_t	*ips[2];
+	xfs_trans_t	*tp;
+	xfs_bstat_t	*sbp = &sxp->sx_stat;
+	bhv_vnode_t	*vp, *tvp;
+	xfs_ifork_t	*tempifp, *ifp, *tifp;
+	int		ilf_fields, tilf_fields;
+	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
+	int		error = 0;
+	int		aforkblks = 0;
+	int		taforkblks = 0;
+	__uint64_t	tmp;
+	char		locked = 0;
+
+	mp = ip->i_mount;
+
+	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
+	if (!tempifp) {
+		error = XFS_ERROR(ENOMEM);
+		goto error0;
+	}
+
+	sbp = &sxp->sx_stat;
+	vp = XFS_ITOV(ip);
+	tvp = XFS_ITOV(tip);
 
 	/* Lock in i_ino order */
 	if (ip->i_ino < tip->i_ino) {
@@ -137,6 +167,7 @@ xfs_swapext(
 	}
 
 	xfs_lock_inodes(ips, 2, 0, lock_flags);
+	locked = 1;
 
 	/* Check permissions */
 	error = xfs_iaccess(ip, S_IWUSR, NULL);
@@ -169,7 +200,7 @@ xfs_swapext(
 
 	if (VN_CACHED(tvp) != 0) {
 		xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
-		VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED);
+		bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED);
 	}
 
 	/* Verify O_DIRECT for ftmp */
@@ -214,7 +245,7 @@ xfs_swapext(
 	/* We need to fail if the file is memory mapped.  Once we have tossed
 	 * all existing pages, the page fault will have no option
 	 * but to go to the filesystem for pages. By making the page fault call
-	 * VOP_READ (or write in the case of autogrow) they block on the iolock
+	 * vop_read (or write in the case of autogrow) they block on the iolock
 	 * until we have switched the extents.
 	 */
 	if (VN_MAPPED(vp)) {
@@ -233,7 +264,7 @@ xfs_swapext(
 	 * fields change.
 	 */
 
-	VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF);
+	bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
 	if ((error = xfs_trans_reserve(tp, 0,
@@ -360,16 +391,7 @@ xfs_swapext(
 		xfs_iunlock(ip,  lock_flags);
 		xfs_iunlock(tip, lock_flags);
 	}
-
-	if (fp != NULL)
-		fput(fp);
-	if (tfp != NULL)
-		fput(tfp);
-
-	if (sxp != NULL)
-		kmem_free(sxp, sizeof(xfs_swapext_t));
 	if (tempifp != NULL)
 		kmem_free(tempifp, sizeof(xfs_ifork_t));
-
 	return error;
 }
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index f678559abc4..da178205be6 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -48,6 +48,9 @@ typedef struct xfs_swapext
  */
 int	xfs_swapext(struct xfs_swapext __user *sx);
 
+int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
+		struct xfs_swapext *sxp);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 79d0d9e1fba..b33826961c4 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -85,7 +85,6 @@ typedef struct xfs_dinode
 	union {
 		xfs_bmdr_block_t di_bmbt;	/* btree root block */
 		xfs_bmbt_rec_32_t di_bmx[1];	/* extent list */
-		xfs_dir_shortform_t di_dirsf;	/* shortform directory */
 		xfs_dir2_sf_t	di_dir2sf;	/* shortform directory v2 */
 		char		di_c[1];	/* local contents */
 		xfs_dev_t	di_dev;		/* device for S_IFCHR/S_IFBLK */
@@ -257,6 +256,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOSYMLINKS_BIT   10	/* disallow symlink creation */
 #define XFS_DIFLAG_EXTSIZE_BIT      11	/* inode extent size allocator hint */
 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
+#define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -270,12 +270,13 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
 #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
 #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
+#define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
 
 #define XFS_DIFLAG_ANY \
 	(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
 	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
-	 XFS_DIFLAG_EXTSZINHERIT)
+	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
 
 #endif	/* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
deleted file mode 100644
index 9cc702a839a..00000000000
--- a/fs/xfs/xfs_dir.c
+++ /dev/null
@@ -1,1217 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_dir_leaf.h"
-#include "xfs_error.h"
-
-/*
- * xfs_dir.c
- *
- * Provide the external interfaces to manage directories.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Functions for the dirops interfaces.
- */
-static void	xfs_dir_mount(struct xfs_mount *mp);
-
-static int	xfs_dir_isempty(struct xfs_inode *dp);
-
-static int	xfs_dir_init(struct xfs_trans *trans,
-			     struct xfs_inode *dir,
-			     struct xfs_inode *parent_dir);
-
-static int	xfs_dir_createname(struct xfs_trans *trans,
-				   struct xfs_inode *dp,
-				   char *name_string,
-				   int name_len,
-				   xfs_ino_t inode_number,
-				   xfs_fsblock_t *firstblock,
-				   xfs_bmap_free_t *flist,
-				   xfs_extlen_t total);
-
-static int	xfs_dir_lookup(struct xfs_trans *tp,
-			       struct xfs_inode *dp,
-			       char *name_string,
-			       int name_length,
-			       xfs_ino_t *inode_number);
-
-static int	xfs_dir_removename(struct xfs_trans *trans,
-				   struct xfs_inode *dp,
-				   char *name_string,
-				   int name_length,
-				   xfs_ino_t ino,
-				   xfs_fsblock_t *firstblock,
-				   xfs_bmap_free_t *flist,
-				   xfs_extlen_t total);
-
-static int	xfs_dir_getdents(struct xfs_trans *tp,
-				 struct xfs_inode *dp,
-				 struct uio *uiop,
-				 int *eofp);
-
-static int	xfs_dir_replace(struct xfs_trans *tp,
-				struct xfs_inode *dp,
-				char *name_string,
-				int name_length,
-				xfs_ino_t inode_number,
-				xfs_fsblock_t *firstblock,
-				xfs_bmap_free_t *flist,
-				xfs_extlen_t total);
-
-static int	xfs_dir_canenter(struct xfs_trans *tp,
-				 struct xfs_inode *dp,
-				 char *name_string,
-				 int name_length);
-
-static int	xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp,
-						  xfs_dinode_t *dip);
-
-xfs_dirops_t xfsv1_dirops = {
-	.xd_mount			= xfs_dir_mount,
-	.xd_isempty			= xfs_dir_isempty,
-	.xd_init			= xfs_dir_init,
-	.xd_createname			= xfs_dir_createname,
-	.xd_lookup			= xfs_dir_lookup,
-	.xd_removename			= xfs_dir_removename,
-	.xd_getdents			= xfs_dir_getdents,
-	.xd_replace			= xfs_dir_replace,
-	.xd_canenter			= xfs_dir_canenter,
-	.xd_shortform_validate_ondisk	= xfs_dir_shortform_validate_ondisk,
-	.xd_shortform_to_single		= xfs_dir_shortform_to_leaf,
-};
-
-/*
- * Internal routines when dirsize == XFS_LBSIZE(mp).
- */
-STATIC int xfs_dir_leaf_lookup(xfs_da_args_t *args);
-STATIC int xfs_dir_leaf_removename(xfs_da_args_t *args, int *number_entries,
-						 int *total_namebytes);
-STATIC int xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp,
-					     uio_t *uio, int *eofp,
-					     xfs_dirent_t *dbp,
-					     xfs_dir_put_t put);
-STATIC int xfs_dir_leaf_replace(xfs_da_args_t *args);
-
-/*
- * Internal routines when dirsize > XFS_LBSIZE(mp).
- */
-STATIC int xfs_dir_node_addname(xfs_da_args_t *args);
-STATIC int xfs_dir_node_lookup(xfs_da_args_t *args);
-STATIC int xfs_dir_node_removename(xfs_da_args_t *args);
-STATIC int xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp,
-					     uio_t *uio, int *eofp,
-					     xfs_dirent_t *dbp,
-					     xfs_dir_put_t put);
-STATIC int xfs_dir_node_replace(xfs_da_args_t *args);
-
-#if defined(XFS_DIR_TRACE)
-ktrace_t *xfs_dir_trace_buf;
-#endif
-
-
-/*========================================================================
- * Overall external interface routines.
- *========================================================================*/
-
-xfs_dahash_t	xfs_dir_hash_dot, xfs_dir_hash_dotdot;
-
-/*
- * One-time startup routine called from xfs_init().
- */
-void
-xfs_dir_startup(void)
-{
-	xfs_dir_hash_dot = xfs_da_hashname(".", 1);
-	xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
-}
-
-/*
- * Initialize directory-related fields in the mount structure.
- */
-static void
-xfs_dir_mount(xfs_mount_t *mp)
-{
-	uint shortcount, leafcount, count;
-
-	mp->m_dirversion = 1;
-	if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
-		shortcount = (mp->m_attroffset -
-				(uint)sizeof(xfs_dir_sf_hdr_t)) /
-				 (uint)sizeof(xfs_dir_sf_entry_t);
-		leafcount = (XFS_LBSIZE(mp) -
-				(uint)sizeof(xfs_dir_leaf_hdr_t)) /
-				 ((uint)sizeof(xfs_dir_leaf_entry_t) +
-				  (uint)sizeof(xfs_dir_leaf_name_t));
-	} else {
-		shortcount = (XFS_BMDR_SPACE_CALC(MINABTPTRS) -
-			      (uint)sizeof(xfs_dir_sf_hdr_t)) /
-			       (uint)sizeof(xfs_dir_sf_entry_t);
-		leafcount = (XFS_LBSIZE(mp) -
-			    (uint)sizeof(xfs_dir_leaf_hdr_t)) /
-			     ((uint)sizeof(xfs_dir_leaf_entry_t) +
-			      (uint)sizeof(xfs_dir_leaf_name_t));
-	}
-	count = shortcount > leafcount ? shortcount : leafcount;
-	mp->m_dircook_elog = xfs_da_log2_roundup(count + 1);
-	ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog);
-	mp->m_dir_node_ents = mp->m_attr_node_ents =
-		(XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) /
-		(uint)sizeof(xfs_da_node_entry_t);
-	mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100;
-	mp->m_dirblksize = mp->m_sb.sb_blocksize;
-	mp->m_dirblkfsbs = 1;
-}
-
-/*
- * Return 1 if directory contains only "." and "..".
- */
-static int
-xfs_dir_isempty(xfs_inode_t *dp)
-{
-	xfs_dir_sf_hdr_t *hdr;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	if (dp->i_d.di_size == 0)
-		return(1);
-	if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
-		return(0);
-	hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	return(hdr->count == 0);
-}
-
-/*
- * Initialize a directory with its "." and ".." entries.
- */
-static int
-xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
-{
-	xfs_da_args_t args;
-	int error;
-
-	memset((char *)&args, 0, sizeof(args));
-	args.dp = dir;
-	args.trans = trans;
-
-	ASSERT((dir->i_d.di_mode & S_IFMT) == S_IFDIR);
-	if ((error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino)))
-		return error;
-
-	return(xfs_dir_shortform_create(&args, parent_dir->i_ino));
-}
-
-/*
- * Generic handler routine to add a name to a directory.
- * Transitions directory from shortform to Btree as necessary.
- */
-static int							/* error */
-xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
-		   int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
-		   xfs_bmap_free_t *flist, xfs_extlen_t total)
-{
-	xfs_da_args_t args;
-	int retval, newsize, done;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-
-	if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum)))
-		return (retval);
-
-	XFS_STATS_INC(xs_dir_create);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	args.name = name;
-	args.namelen = namelen;
-	args.hashval = xfs_da_hashname(name, namelen);
-	args.inumber = inum;
-	args.dp = dp;
-	args.firstblock = firstblock;
-	args.flist = flist;
-	args.total = total;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = trans;
-	args.justcheck = 0;
-	args.addname = args.oknoent = 1;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	done = 0;
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
-		if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) {
-			retval = xfs_dir_shortform_addname(&args);
-			done = 1;
-		} else {
-			if (total == 0)
-				return XFS_ERROR(ENOSPC);
-			retval = xfs_dir_shortform_to_leaf(&args);
-			done = retval != 0;
-		}
-	}
-	if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_addname(&args);
-		done = retval != ENOSPC;
-		if (!done) {
-			if (total == 0)
-				return XFS_ERROR(ENOSPC);
-			retval = xfs_dir_leaf_to_node(&args);
-			done = retval != 0;
-		}
-	}
-	if (!done) {
-		retval = xfs_dir_node_addname(&args);
-	}
-	return(retval);
-}
-
-/*
- * Generic handler routine to check if a name can be added to a directory,
- * without adding any blocks to the directory.
- */
-static int							/* error */
-xfs_dir_canenter(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen)
-{
-	xfs_da_args_t args;
-	int retval, newsize;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	args.name = name;
-	args.namelen = namelen;
-	args.hashval = xfs_da_hashname(name, namelen);
-	args.inumber = 0;
-	args.dp = dp;
-	args.firstblock = NULL;
-	args.flist = NULL;
-	args.total = 0;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = trans;
-	args.justcheck = args.addname = args.oknoent = 1;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
-		if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp))
-			retval = 0;
-		else
-			retval = XFS_ERROR(ENOSPC);
-	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_addname(&args);
-	} else {
-		retval = xfs_dir_node_addname(&args);
-	}
-	return(retval);
-}
-
-/*
- * Generic handler routine to remove a name from a directory.
- * Transitions directory from Btree to shortform as necessary.
- */
-static int							/* error */
-xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
-		   int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock,
-		   xfs_bmap_free_t *flist, xfs_extlen_t total)
-{
-	xfs_da_args_t args;
-	int count, totallen, newsize, retval;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	XFS_STATS_INC(xs_dir_remove);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	args.name = name;
-	args.namelen = namelen;
-	args.hashval = xfs_da_hashname(name, namelen);
-	args.inumber = ino;
-	args.dp = dp;
-	args.firstblock = firstblock;
-	args.flist = flist;
-	args.total = total;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = trans;
-	args.justcheck = args.addname = args.oknoent = 0;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		retval = xfs_dir_shortform_removename(&args);
-	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_removename(&args, &count, &totallen);
-		if (retval == 0) {
-			newsize = XFS_DIR_SF_ALLFIT(count, totallen);
-			if (newsize <= XFS_IFORK_DSIZE(dp)) {
-				retval = xfs_dir_leaf_to_shortform(&args);
-			}
-		}
-	} else {
-		retval = xfs_dir_node_removename(&args);
-	}
-	return(retval);
-}
-
-static int							/* error */
-xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
-				   xfs_ino_t *inum)
-{
-	xfs_da_args_t args;
-	int retval;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-
-	XFS_STATS_INC(xs_dir_lookup);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	args.name = name;
-	args.namelen = namelen;
-	args.hashval = xfs_da_hashname(name, namelen);
-	args.inumber = 0;
-	args.dp = dp;
-	args.firstblock = NULL;
-	args.flist = NULL;
-	args.total = 0;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = trans;
-	args.justcheck = args.addname = 0;
-	args.oknoent = 1;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		retval = xfs_dir_shortform_lookup(&args);
-	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_lookup(&args);
-	} else {
-		retval = xfs_dir_node_lookup(&args);
-	}
-	if (retval == EEXIST)
-		retval = 0;
-	*inum = args.inumber;
-	return(retval);
-}
-
-/*
- * Implement readdir.
- */
-static int							/* error */
-xfs_dir_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio, int *eofp)
-{
-	xfs_dirent_t *dbp;
-	int  alignment, retval;
-	xfs_dir_put_t put;
-
-	XFS_STATS_INC(xs_dir_getdents);
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-
-	/*
-	 * If our caller has given us a single contiguous memory buffer,
-	 * just work directly within that buffer.  If it's in user memory,
-	 * lock it down first.
-	 */
-	alignment = sizeof(xfs_off_t) - 1;
-	if ((uio->uio_iovcnt == 1) &&
-	    (((__psint_t)uio->uio_iov[0].iov_base & alignment) == 0) &&
-	    ((uio->uio_iov[0].iov_len & alignment) == 0)) {
-		dbp = NULL;
-		put = xfs_dir_put_dirent64_direct;
-	} else {
-		dbp = kmem_alloc(sizeof(*dbp) + MAXNAMELEN, KM_SLEEP);
-		put = xfs_dir_put_dirent64_uio;
-	}
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	*eofp = 0;
-
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		retval = xfs_dir_shortform_getdents(dp, uio, eofp, dbp, put);
-	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_getdents(trans, dp, uio, eofp, dbp, put);
-	} else {
-		retval = xfs_dir_node_getdents(trans, dp, uio, eofp, dbp, put);
-	}
-	if (dbp != NULL)
-		kmem_free(dbp, sizeof(*dbp) + MAXNAMELEN);
-
-	return(retval);
-}
-
-static int							/* error */
-xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
-				    xfs_ino_t inum, xfs_fsblock_t *firstblock,
-				    xfs_bmap_free_t *flist, xfs_extlen_t total)
-{
-	xfs_da_args_t args;
-	int retval;
-
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-
-	if ((retval = xfs_dir_ino_validate(trans->t_mountp, inum)))
-		return retval;
-
-	/*
-	 * Fill in the arg structure for this request.
-	 */
-	args.name = name;
-	args.namelen = namelen;
-	args.hashval = xfs_da_hashname(name, namelen);
-	args.inumber = inum;
-	args.dp = dp;
-	args.firstblock = firstblock;
-	args.flist = flist;
-	args.total = total;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = trans;
-	args.justcheck = args.addname = args.oknoent = 0;
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-		retval = xfs_dir_shortform_replace(&args);
-	} else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
-		retval = xfs_dir_leaf_replace(&args);
-	} else {
-		retval = xfs_dir_node_replace(&args);
-	}
-
-	return(retval);
-}
-
-static int
-xfs_dir_shortform_validate_ondisk(xfs_mount_t *mp, xfs_dinode_t *dp)
-{
-	xfs_ino_t		ino;
-	int			namelen_sum;
-	int			count;
-	xfs_dir_shortform_t	*sf;
-	xfs_dir_sf_entry_t	*sfe;
-	int			i;
-
-
-
-	if ((INT_GET(dp->di_core.di_mode, ARCH_CONVERT) & S_IFMT) != S_IFDIR) {
-		return 0;
-	}
-	if (INT_GET(dp->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_LOCAL) {
-		return 0;
-	}
-	if (INT_GET(dp->di_core.di_size, ARCH_CONVERT) < sizeof(sf->hdr)) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid shortform size: dp 0x%p",
-			dp);
-		return 1;
-	}
-	sf = (xfs_dir_shortform_t *)(&dp->di_u.di_dirsf);
-	ino = XFS_GET_DIR_INO8(sf->hdr.parent);
-	if (xfs_dir_ino_validate(mp, ino))
-		return 1;
-
-	count =	sf->hdr.count;
-	if ((count < 0) || ((count * 10) > XFS_LITINO(mp))) {
-		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform count: dp 0x%p", dp);
-		return(1);
-	}
-
-	if (count == 0) {
-		return 0;
-	}
-
-	namelen_sum = 0;
-	sfe = &sf->list[0];
-	for (i = sf->hdr.count - 1; i >= 0; i--) {
-		ino = XFS_GET_DIR_INO8(sfe->inumber);
-		xfs_dir_ino_validate(mp, ino);
-		if (sfe->namelen >= XFS_LITINO(mp)) {
-			xfs_fs_cmn_err(CE_WARN, mp,
-				"Invalid shortform namelen: dp 0x%p", dp);
-			return 1;
-		}
-		namelen_sum += sfe->namelen;
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-	if (namelen_sum >= XFS_LITINO(mp)) {
-		xfs_fs_cmn_err(CE_WARN, mp,
-			"Invalid shortform namelen: dp 0x%p", dp);
-		return 1;
-	}
-
-	return 0;
-}
-
-/*========================================================================
- * External routines when dirsize == XFS_LBSIZE(dp->i_mount).
- *========================================================================*/
-
-/*
- * Add a name to the leaf directory structure
- * This is the external routine.
- */
-int
-xfs_dir_leaf_addname(xfs_da_args_t *args)
-{
-	int index, retval;
-	xfs_dabuf_t *bp;
-
-	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
-					      XFS_DATA_FORK);
-	if (retval)
-		return(retval);
-	ASSERT(bp != NULL);
-
-	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
-	if (retval == ENOENT)
-		retval = xfs_dir_leaf_add(bp, args, index);
-	xfs_da_buf_done(bp);
-	return(retval);
-}
-
-/*
- * Remove a name from the leaf directory structure
- * This is the external routine.
- */
-STATIC int
-xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
-{
-	xfs_dir_leafblock_t *leaf;
-	int index, retval;
-	xfs_dabuf_t *bp;
-
-	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
-					      XFS_DATA_FORK);
-	if (retval)
-		return(retval);
-	ASSERT(bp != NULL);
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
-	if (retval == EEXIST) {
-		(void)xfs_dir_leaf_remove(args->trans, bp, index);
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		*totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
-		retval = 0;
-	}
-	xfs_da_buf_done(bp);
-	return(retval);
-}
-
-/*
- * Look up a name in a leaf directory structure.
- * This is the external routine.
- */
-STATIC int
-xfs_dir_leaf_lookup(xfs_da_args_t *args)
-{
-	int index, retval;
-	xfs_dabuf_t *bp;
-
-	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
-					      XFS_DATA_FORK);
-	if (retval)
-		return(retval);
-	ASSERT(bp != NULL);
-	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
-	xfs_da_brelse(args->trans, bp);
-	return(retval);
-}
-
-/*
- * Copy out directory entries for getdents(), for leaf directories.
- */
-STATIC int
-xfs_dir_leaf_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
-				  int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put)
-{
-	xfs_dabuf_t *bp;
-	int retval, eob;
-
-	retval = xfs_da_read_buf(dp->i_transp, dp, 0, -1, &bp, XFS_DATA_FORK);
-	if (retval)
-		return(retval);
-	ASSERT(bp != NULL);
-	retval = xfs_dir_leaf_getdents_int(bp, dp, 0, uio, &eob, dbp, put, -1);
-	xfs_da_brelse(trans, bp);
-	*eofp = (eob == 0);
-	return(retval);
-}
-
-/*
- * Look up a name in a leaf directory structure, replace the inode number.
- * This is the external routine.
- */
-STATIC int
-xfs_dir_leaf_replace(xfs_da_args_t *args)
-{
-	int index, retval;
-	xfs_dabuf_t *bp;
-	xfs_ino_t inum;
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-
-	inum = args->inumber;
-	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
-					      XFS_DATA_FORK);
-	if (retval)
-		return(retval);
-	ASSERT(bp != NULL);
-	retval = xfs_dir_leaf_lookup_int(bp, args, &index);
-	if (retval == EEXIST) {
-		leaf = bp->data;
-		entry = &leaf->entries[index];
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-		/* XXX - replace assert? */
-		XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber);
-		xfs_da_log_buf(args->trans, bp,
-		    XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
-		xfs_da_buf_done(bp);
-		retval = 0;
-	} else
-		xfs_da_brelse(args->trans, bp);
-	return(retval);
-}
-
-
-/*========================================================================
- * External routines when dirsize > XFS_LBSIZE(mp).
- *========================================================================*/
-
-/*
- * Add a name to a Btree-format directory.
- *
- * This will involve walking down the Btree, and may involve splitting
- * leaf nodes and even splitting intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
- */
-STATIC int
-xfs_dir_node_addname(xfs_da_args_t *args)
-{
-	xfs_da_state_t *state;
-	xfs_da_state_blk_t *blk;
-	int retval, error;
-
-	/*
-	 * Fill in bucket of arguments/results/context to carry around.
-	 */
-	state = xfs_da_state_alloc();
-	state->args = args;
-	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_dir_node_ents;
-
-	/*
-	 * Search to see if name already exists, and get back a pointer
-	 * to where it should go.
-	 */
-	error = xfs_da_node_lookup_int(state, &retval);
-	if (error)
-		retval = error;
-	if (retval != ENOENT)
-		goto error;
-	blk = &state->path.blk[ state->path.active-1 ];
-	ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
-	retval = xfs_dir_leaf_add(blk->bp, args, blk->index);
-	if (retval == 0) {
-		/*
-		 * Addition succeeded, update Btree hashvals.
-		 */
-		if (!args->justcheck)
-			xfs_da_fixhashpath(state, &state->path);
-	} else {
-		/*
-		 * Addition failed, split as many Btree elements as required.
-		 */
-		if (args->total == 0) {
-			ASSERT(retval == ENOSPC);
-			goto error;
-		}
-		retval = xfs_da_split(state);
-	}
-error:
-	xfs_da_state_free(state);
-
-	return(retval);
-}
-
-/*
- * Remove a name from a B-tree directory.
- *
- * This will involve walking down the Btree, and may involve joining
- * leaf nodes and even joining intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
- */
-STATIC int
-xfs_dir_node_removename(xfs_da_args_t *args)
-{
-	xfs_da_state_t *state;
-	xfs_da_state_blk_t *blk;
-	int retval, error;
-
-	state = xfs_da_state_alloc();
-	state->args = args;
-	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_dir_node_ents;
-
-	/*
-	 * Search to see if name exists, and get back a pointer to it.
-	 */
-	error = xfs_da_node_lookup_int(state, &retval);
-	if (error)
-		retval = error;
-	if (retval != EEXIST) {
-		xfs_da_state_free(state);
-		return(retval);
-	}
-
-	/*
-	 * Remove the name and update the hashvals in the tree.
-	 */
-	blk = &state->path.blk[ state->path.active-1 ];
-	ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
-	retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index);
-	xfs_da_fixhashpath(state, &state->path);
-
-	/*
-	 * Check to see if the tree needs to be collapsed.
-	 */
-	error = 0;
-	if (retval) {
-		error = xfs_da_join(state);
-	}
-
-	xfs_da_state_free(state);
-	if (error)
-		return(error);
-	return(0);
-}
-
-/*
- * Look up a filename in a int directory.
- * Use an internal routine to actually do all the work.
- */
-STATIC int
-xfs_dir_node_lookup(xfs_da_args_t *args)
-{
-	xfs_da_state_t *state;
-	int retval, error, i;
-
-	state = xfs_da_state_alloc();
-	state->args = args;
-	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_dir_node_ents;
-
-	/*
-	 * Search to see if name exists,
-	 * and get back a pointer to it.
-	 */
-	error = xfs_da_node_lookup_int(state, &retval);
-	if (error) {
-		retval = error;
-	}
-
-	/*
-	 * If not in a transaction, we have to release all the buffers.
-	 */
-	for (i = 0; i < state->path.active; i++) {
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
-		state->path.blk[i].bp = NULL;
-	}
-
-	xfs_da_state_free(state);
-	return(retval);
-}
-
-STATIC int
-xfs_dir_node_getdents(xfs_trans_t *trans, xfs_inode_t *dp, uio_t *uio,
-				  int *eofp, xfs_dirent_t *dbp, xfs_dir_put_t put)
-{
-	xfs_da_intnode_t *node;
-	xfs_da_node_entry_t *btree;
-	xfs_dir_leafblock_t *leaf = NULL;
-	xfs_dablk_t bno, nextbno;
-	xfs_dahash_t cookhash;
-	xfs_mount_t *mp;
-	int error, eob, i;
-	xfs_dabuf_t *bp;
-	xfs_daddr_t nextda;
-
-	/*
-	 * Pick up our context.
-	 */
-	mp = dp->i_mount;
-	bp = NULL;
-	bno = XFS_DA_COOKIE_BNO(mp, uio->uio_offset);
-	cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
-
-	xfs_dir_trace_g_du("node: start", dp, uio);
-
-	/*
-	 * Re-find our place, even if we're confused about what our place is.
-	 *
-	 * First we check the block number from the magic cookie, it is a
-	 * cache of where we ended last time.  If we find a leaf block, and
-	 * the starting hashval in that block is less than our desired
-	 * hashval, then we run with it.
-	 */
-	if (bno > 0) {
-		error = xfs_da_read_buf(trans, dp, bno, -2, &bp, XFS_DATA_FORK);
-		if ((error != 0) && (error != EFSCORRUPTED))
-			return(error);
-		if (bp)
-			leaf = bp->data;
-		if (bp && be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
-			xfs_dir_trace_g_dub("node: block not a leaf",
-						   dp, uio, bno);
-			xfs_da_brelse(trans, bp);
-			bp = NULL;
-		}
-		if (bp && INT_GET(leaf->entries[0].hashval, ARCH_CONVERT) > cookhash) {
-			xfs_dir_trace_g_dub("node: leaf hash too large",
-						   dp, uio, bno);
-			xfs_da_brelse(trans, bp);
-			bp = NULL;
-		}
-		if (bp &&
-		    cookhash > INT_GET(leaf->entries[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)) {
-			xfs_dir_trace_g_dub("node: leaf hash too small",
-						   dp, uio, bno);
-			xfs_da_brelse(trans, bp);
-			bp = NULL;
-		}
-	}
-
-	/*
-	 * If we did not find a leaf block from the blockno in the cookie,
-	 * or we there was no blockno in the cookie (eg: first time thru),
-	 * the we start at the top of the Btree and re-find our hashval.
-	 */
-	if (bp == NULL) {
-		xfs_dir_trace_g_du("node: start at root" , dp, uio);
-		bno = 0;
-		for (;;) {
-			error = xfs_da_read_buf(trans, dp, bno, -1, &bp,
-						       XFS_DATA_FORK);
-			if (error)
-				return(error);
-			if (bp == NULL)
-				return(XFS_ERROR(EFSCORRUPTED));
-			node = bp->data;
-			if (be16_to_cpu(node->hdr.info.magic) != XFS_DA_NODE_MAGIC)
-				break;
-			btree = &node->btree[0];
-			xfs_dir_trace_g_dun("node: node detail", dp, uio, node);
-			for (i = 0; i < be16_to_cpu(node->hdr.count); btree++, i++) {
-				if (be32_to_cpu(btree->hashval) >= cookhash) {
-					bno = be32_to_cpu(btree->before);
-					break;
-				}
-			}
-			if (i == be16_to_cpu(node->hdr.count)) {
-				xfs_da_brelse(trans, bp);
-				xfs_dir_trace_g_du("node: hash beyond EOF",
-							  dp, uio);
-				uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0,
-							     XFS_DA_MAXHASH);
-				*eofp = 1;
-				return(0);
-			}
-			xfs_dir_trace_g_dub("node: going to block",
-						   dp, uio, bno);
-			xfs_da_brelse(trans, bp);
-		}
-	}
-	ASSERT(cookhash != XFS_DA_MAXHASH);
-
-	/*
-	 * We've dropped down to the (first) leaf block that contains the
-	 * hashval we are interested in.  Continue rolling upward thru the
-	 * leaf blocks until we fill up our buffer.
-	 */
-	for (;;) {
-		leaf = bp->data;
-		if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC)) {
-			xfs_dir_trace_g_dul("node: not a leaf", dp, uio, leaf);
-			xfs_da_brelse(trans, bp);
-			XFS_CORRUPTION_ERROR("xfs_dir_node_getdents(1)",
-					     XFS_ERRLEVEL_LOW, mp, leaf);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-		xfs_dir_trace_g_dul("node: leaf detail", dp, uio, leaf);
-		if ((nextbno = be32_to_cpu(leaf->hdr.info.forw))) {
-			nextda = xfs_da_reada_buf(trans, dp, nextbno,
-						  XFS_DATA_FORK);
-		} else
-			nextda = -1;
-		error = xfs_dir_leaf_getdents_int(bp, dp, bno, uio, &eob, dbp,
-						  put, nextda);
-		xfs_da_brelse(trans, bp);
-		bno = nextbno;
-		if (eob) {
-			xfs_dir_trace_g_dub("node: E-O-B", dp, uio, bno);
-			*eofp = 0;
-			return(error);
-		}
-		if (bno == 0)
-			break;
-		error = xfs_da_read_buf(trans, dp, bno, nextda, &bp,
-					XFS_DATA_FORK);
-		if (error)
-			return(error);
-		if (unlikely(bp == NULL)) {
-			XFS_ERROR_REPORT("xfs_dir_node_getdents(2)",
-					 XFS_ERRLEVEL_LOW, mp);
-			return(XFS_ERROR(EFSCORRUPTED));
-		}
-	}
-	*eofp = 1;
-	xfs_dir_trace_g_du("node: E-O-F", dp, uio);
-	return(0);
-}
-
-/*
- * Look up a filename in an int directory, replace the inode number.
- * Use an internal routine to actually do the lookup.
- */
-STATIC int
-xfs_dir_node_replace(xfs_da_args_t *args)
-{
-	xfs_da_state_t *state;
-	xfs_da_state_blk_t *blk;
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-	xfs_ino_t inum;
-	int retval, error, i;
-	xfs_dabuf_t *bp;
-
-	state = xfs_da_state_alloc();
-	state->args = args;
-	state->mp = args->dp->i_mount;
-	state->blocksize = state->mp->m_sb.sb_blocksize;
-	state->node_ents = state->mp->m_dir_node_ents;
-	inum = args->inumber;
-
-	/*
-	 * Search to see if name exists,
-	 * and get back a pointer to it.
-	 */
-	error = xfs_da_node_lookup_int(state, &retval);
-	if (error) {
-		retval = error;
-	}
-
-	if (retval == EEXIST) {
-		blk = &state->path.blk[state->path.active - 1];
-		ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
-		bp = blk->bp;
-		leaf = bp->data;
-		entry = &leaf->entries[blk->index];
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-		/* XXX - replace assert ? */
-		XFS_DIR_SF_PUT_DIRINO(&inum, &namest->inumber);
-		xfs_da_log_buf(args->trans, bp,
-		    XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
-		xfs_da_buf_done(bp);
-		blk->bp = NULL;
-		retval = 0;
-	} else {
-		i = state->path.active - 1;
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
-		state->path.blk[i].bp = NULL;
-	}
-	for (i = 0; i < state->path.active - 1; i++) {
-		xfs_da_brelse(args->trans, state->path.blk[i].bp);
-		state->path.blk[i].bp = NULL;
-	}
-
-	xfs_da_state_free(state);
-	return(retval);
-}
-
-#if defined(XFS_DIR_TRACE)
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_du(char *where, xfs_inode_t *dp, uio_t *uio)
-{
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DU, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_dub(char *where, xfs_inode_t *dp, uio_t *uio, xfs_dablk_t bno)
-{
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUB, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)bno,
-		     NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_dun(char *where, xfs_inode_t *dp, uio_t *uio,
-			xfs_da_intnode_t *node)
-{
-	int	last = be16_to_cpu(node->hdr.count) - 1;
-
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUN, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)be32_to_cpu(node->hdr.info.forw),
-		     (void *)(unsigned long)
-			be16_to_cpu(node->hdr.count),
-		     (void *)(unsigned long)
-			be32_to_cpu(node->btree[0].hashval),
-		     (void *)(unsigned long)
-			be32_to_cpu(node->btree[last].hashval),
-		     NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_dul(char *where, xfs_inode_t *dp, uio_t *uio,
-			xfs_dir_leafblock_t *leaf)
-{
-	int	last = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
-
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUL, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)be32_to_cpu(leaf->hdr.info.forw),
-		     (void *)(unsigned long)
-			INT_GET(leaf->hdr.count, ARCH_CONVERT),
-		     (void *)(unsigned long)
-			INT_GET(leaf->entries[0].hashval, ARCH_CONVERT),
-		     (void *)(unsigned long)
-			INT_GET(leaf->entries[last].hashval, ARCH_CONVERT),
-		     NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_due(char *where, xfs_inode_t *dp, uio_t *uio,
-			xfs_dir_leaf_entry_t *entry)
-{
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUE, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)(unsigned long)
-			INT_GET(entry->hashval, ARCH_CONVERT),
-		     NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for an inode and a uio.
- */
-void
-xfs_dir_trace_g_duc(char *where, xfs_inode_t *dp, uio_t *uio, xfs_off_t cookie)
-{
-	xfs_dir_trace_enter(XFS_DIR_KTRACE_G_DUC, where,
-		     (void *)dp, (void *)dp->i_mount,
-		     (void *)((unsigned long)(uio->uio_offset >> 32)),
-		     (void *)((unsigned long)(uio->uio_offset & 0xFFFFFFFF)),
-		     (void *)(unsigned long)uio->uio_resid,
-		     (void *)((unsigned long)(cookie >> 32)),
-		     (void *)((unsigned long)(cookie & 0xFFFFFFFF)),
-		     NULL, NULL, NULL, NULL, NULL);
-}
-
-/*
- * Add a trace buffer entry for the arguments given to the routine,
- * generic form.
- */
-void
-xfs_dir_trace_enter(int type, char *where,
-			void * a0, void * a1,
-			void * a2, void * a3,
-			void * a4, void * a5,
-			void * a6, void * a7,
-			void * a8, void * a9,
-			void * a10, void * a11)
-{
-	ASSERT(xfs_dir_trace_buf);
-	ktrace_enter(xfs_dir_trace_buf, (void *)(unsigned long)type,
-					(void *)where,
-					(void *)a0, (void *)a1, (void *)a2,
-					(void *)a3, (void *)a4, (void *)a5,
-					(void *)a6, (void *)a7, (void *)a8,
-					(void *)a9, (void *)a10, (void *)a11,
-					NULL, NULL);
-}
-#endif	/* XFS_DIR_TRACE */
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
deleted file mode 100644
index 8cc8afb9f6c..00000000000
--- a/fs/xfs/xfs_dir.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR_H__
-#define	__XFS_DIR_H__
-
-/*
- * Large directories are structured around Btrees where all the data
- * elements are in the leaf nodes.  Filenames are hashed into an int,
- * then that int is used as the index into the Btree.  Since the hashval
- * of a filename may not be unique, we may have duplicate keys.  The
- * internal links in the Btree are logical block offsets into the file.
- *
- * Small directories use a different format and are packed as tightly
- * as possible so as to fit into the literal area of the inode.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-struct uio;
-struct xfs_bmap_free;
-struct xfs_da_args;
-struct xfs_dinode;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Directory function types.
- * Put in structures (xfs_dirops_t) for v1 and v2 directories.
- */
-typedef void	(*xfs_dir_mount_t)(struct xfs_mount *mp);
-typedef int	(*xfs_dir_isempty_t)(struct xfs_inode *dp);
-typedef int	(*xfs_dir_init_t)(struct xfs_trans *tp,
-				  struct xfs_inode *dp,
-				  struct xfs_inode *pdp);
-typedef int	(*xfs_dir_createname_t)(struct xfs_trans *tp,
-					struct xfs_inode *dp,
-					char *name,
-					int namelen,
-					xfs_ino_t inum,
-					xfs_fsblock_t *first,
-					struct xfs_bmap_free *flist,
-					xfs_extlen_t total);
-typedef int	(*xfs_dir_lookup_t)(struct xfs_trans *tp,
-				    struct xfs_inode *dp,
-				    char *name,
-				    int namelen,
-				    xfs_ino_t *inum);
-typedef int	(*xfs_dir_removename_t)(struct xfs_trans *tp,
-					struct xfs_inode *dp,
-					char *name,
-					int namelen,
-					xfs_ino_t ino,
-					xfs_fsblock_t *first,
-					struct xfs_bmap_free *flist,
-					xfs_extlen_t total);
-typedef int	(*xfs_dir_getdents_t)(struct xfs_trans *tp,
-				      struct xfs_inode *dp,
-				      struct uio *uio,
-				      int *eofp);
-typedef int	(*xfs_dir_replace_t)(struct xfs_trans *tp,
-				     struct xfs_inode *dp,
-				     char *name,
-				     int namelen,
-				     xfs_ino_t inum,
-				     xfs_fsblock_t *first,
-				     struct xfs_bmap_free *flist,
-				     xfs_extlen_t total);
-typedef int	(*xfs_dir_canenter_t)(struct xfs_trans *tp,
-				      struct xfs_inode *dp,
-				      char *name,
-				      int namelen);
-typedef int	(*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp,
-						       struct xfs_dinode *dip);
-typedef int	(*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args);
-
-typedef struct xfs_dirops {
-	xfs_dir_mount_t				xd_mount;
-	xfs_dir_isempty_t			xd_isempty;
-	xfs_dir_init_t				xd_init;
-	xfs_dir_createname_t			xd_createname;
-	xfs_dir_lookup_t			xd_lookup;
-	xfs_dir_removename_t			xd_removename;
-	xfs_dir_getdents_t			xd_getdents;
-	xfs_dir_replace_t			xd_replace;
-	xfs_dir_canenter_t			xd_canenter;
-	xfs_dir_shortform_validate_ondisk_t	xd_shortform_validate_ondisk;
-	xfs_dir_shortform_to_single_t		xd_shortform_to_single;
-} xfs_dirops_t;
-
-/*
- * Overall external interface routines.
- */
-void	xfs_dir_startup(void);	/* called exactly once */
-
-#define	XFS_DIR_MOUNT(mp)	\
-	((mp)->m_dirops.xd_mount(mp))
-#define	XFS_DIR_ISEMPTY(mp,dp)	\
-	((mp)->m_dirops.xd_isempty(dp))
-#define	XFS_DIR_INIT(mp,tp,dp,pdp)	\
-	((mp)->m_dirops.xd_init(tp,dp,pdp))
-#define	XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \
-	((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\
-				      total))
-#define	XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum)	\
-	((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum))
-#define	XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total)	\
-	((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total))
-#define	XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp)	\
-	((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp))
-#define	XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total)	\
-	((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total))
-#define	XFS_DIR_CANENTER(mp,tp,dp,name,namelen)	\
-	((mp)->m_dirops.xd_canenter(tp,dp,name,namelen))
-#define	XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip)	\
-	((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip))
-#define	XFS_DIR_SHORTFORM_TO_SINGLE(mp,args)	\
-	((mp)->m_dirops.xd_shortform_to_single(args))
-
-#define	XFS_DIR_IS_V1(mp)	((mp)->m_dirversion == 1)
-#define	XFS_DIR_IS_V2(mp)	((mp)->m_dirversion == 2)
-extern xfs_dirops_t xfsv1_dirops;
-extern xfs_dirops_t xfsv2_dirops;
-
-#endif	/* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 022c8398ab6..8edbe1adb95 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,21 +24,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
@@ -46,69 +43,14 @@
 #include "xfs_dir2_trace.h"
 #include "xfs_error.h"
 
-/*
- * Declarations for interface routines.
- */
-static void	xfs_dir2_mount(xfs_mount_t *mp);
-static int	xfs_dir2_isempty(xfs_inode_t *dp);
-static int	xfs_dir2_init(xfs_trans_t *tp, xfs_inode_t *dp,
-			      xfs_inode_t *pdp);
-static int	xfs_dir2_createname(xfs_trans_t *tp, xfs_inode_t *dp,
-				    char *name, int namelen, xfs_ino_t inum,
-				    xfs_fsblock_t *first,
-				    xfs_bmap_free_t *flist, xfs_extlen_t total);
-static int	xfs_dir2_lookup(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
-				int namelen, xfs_ino_t *inum);
-static int	xfs_dir2_removename(xfs_trans_t *tp, xfs_inode_t *dp,
-				    char *name, int namelen, xfs_ino_t ino,
-				    xfs_fsblock_t *first,
-				    xfs_bmap_free_t *flist, xfs_extlen_t total);
-static int	xfs_dir2_getdents(xfs_trans_t *tp, xfs_inode_t *dp, uio_t *uio,
-				  int *eofp);
-static int	xfs_dir2_replace(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
-				 int namelen, xfs_ino_t inum,
-				 xfs_fsblock_t *first, xfs_bmap_free_t *flist,
-				 xfs_extlen_t total);
-static int	xfs_dir2_canenter(xfs_trans_t *tp, xfs_inode_t *dp, char *name,
-				  int namelen);
-static int	xfs_dir2_shortform_validate_ondisk(xfs_mount_t *mp,
-						   xfs_dinode_t *dip);
-
-/*
- * Utility routine declarations.
- */
 static int	xfs_dir2_put_dirent64_direct(xfs_dir2_put_args_t *pa);
 static int	xfs_dir2_put_dirent64_uio(xfs_dir2_put_args_t *pa);
 
-/*
- * Directory operations vector.
- */
-xfs_dirops_t	xfsv2_dirops = {
-	.xd_mount			= xfs_dir2_mount,
-	.xd_isempty			= xfs_dir2_isempty,
-	.xd_init			= xfs_dir2_init,
-	.xd_createname			= xfs_dir2_createname,
-	.xd_lookup			= xfs_dir2_lookup,
-	.xd_removename			= xfs_dir2_removename,
-	.xd_getdents			= xfs_dir2_getdents,
-	.xd_replace			= xfs_dir2_replace,
-	.xd_canenter			= xfs_dir2_canenter,
-	.xd_shortform_validate_ondisk	= xfs_dir2_shortform_validate_ondisk,
-	.xd_shortform_to_single		= xfs_dir2_sf_to_block,
-};
-
-/*
- * Interface routines.
- */
-
-/*
- * Initialize directory-related fields in the mount structure.
- */
-static void
-xfs_dir2_mount(
-	xfs_mount_t	*mp)		/* filesystem mount point */
+void
+xfs_dir_mount(
+	xfs_mount_t	*mp)
 {
-	mp->m_dirversion = 2;
+	ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
 	       XFS_MAX_BLOCKSIZE);
 	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
@@ -128,19 +70,15 @@ xfs_dir2_mount(
 /*
  * Return 1 if directory contains only "." and "..".
  */
-static int				/* return code */
-xfs_dir2_isempty(
-	xfs_inode_t	*dp)		/* incore inode structure */
+int
+xfs_dir_isempty(
+	xfs_inode_t	*dp)
 {
-	xfs_dir2_sf_t	*sfp;		/* shortform directory structure */
+	xfs_dir2_sf_t	*sfp;
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	/*
-	 * Might happen during shutdown.
-	 */
-	if (dp->i_d.di_size == 0) {
+	if (dp->i_d.di_size == 0)	/* might happen during shutdown. */
 		return 1;
-	}
 	if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
 		return 0;
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
@@ -148,53 +86,83 @@ xfs_dir2_isempty(
 }
 
 /*
+ * Validate a given inode number.
+ */
+int
+xfs_dir_ino_validate(
+	xfs_mount_t	*mp,
+	xfs_ino_t	ino)
+{
+	xfs_agblock_t	agblkno;
+	xfs_agino_t	agino;
+	xfs_agnumber_t	agno;
+	int		ino_ok;
+	int		ioff;
+
+	agno = XFS_INO_TO_AGNO(mp, ino);
+	agblkno = XFS_INO_TO_AGBNO(mp, ino);
+	ioff = XFS_INO_TO_OFFSET(mp, ino);
+	agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
+	ino_ok =
+		agno < mp->m_sb.sb_agcount &&
+		agblkno < mp->m_sb.sb_agblocks &&
+		agblkno != 0 &&
+		ioff < (1 << mp->m_sb.sb_inopblog) &&
+		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
+	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
+			XFS_RANDOM_DIR_INO_VALIDATE))) {
+		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
+				(unsigned long long) ino);
+		XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	return 0;
+}
+
+/*
  * Initialize a directory with its "." and ".." entries.
  */
-static int				/* error */
-xfs_dir2_init(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
-	xfs_inode_t	*pdp)		/* incore parent directory inode */
+int
+xfs_dir_init(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
+	xfs_inode_t	*pdp)
 {
-	xfs_da_args_t	args;		/* operation arguments */
-	int		error;		/* error return value */
+	xfs_da_args_t	args;
+	int		error;
 
 	memset((char *)&args, 0, sizeof(args));
 	args.dp = dp;
 	args.trans = tp;
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) {
+	if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
 		return error;
-	}
 	return xfs_dir2_sf_create(&args, pdp->i_ino);
 }
 
 /*
   Enter a name in a directory.
  */
-static int					/* error */
-xfs_dir2_createname(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*dp,		/* incore directory inode */
-	char			*name,		/* new entry name */
-	int			namelen,	/* new entry name length */
+int
+xfs_dir_createname(
+	xfs_trans_t		*tp,
+	xfs_inode_t		*dp,
+	char			*name,
+	int			namelen,
 	xfs_ino_t		inum,		/* new entry inode number */
 	xfs_fsblock_t		*first,		/* bmap's firstblock */
 	xfs_bmap_free_t		*flist,		/* bmap's freeblock list */
 	xfs_extlen_t		total)		/* bmap's total block count */
 {
-	xfs_da_args_t		args;		/* operation arguments */
-	int			rval;		/* return value */
+	xfs_da_args_t		args;
+	int			rval;
 	int			v;		/* type-checking value */
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) {
+	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
 		return rval;
-	}
 	XFS_STATS_INC(xs_dir_create);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
+
 	args.name = name;
 	args.namelen = namelen;
 	args.hashval = xfs_da_hashname(name, namelen);
@@ -207,18 +175,16 @@ xfs_dir2_createname(
 	args.trans = tp;
 	args.justcheck = 0;
 	args.addname = args.oknoent = 1;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
+
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_addname(&args);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_addname(&args);
-	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_leaf_addname(&args);
 	else
 		rval = xfs_dir2_node_addname(&args);
@@ -228,24 +194,21 @@ xfs_dir2_createname(
 /*
  * Lookup a name in a directory, give back the inode number.
  */
-static int				/* error */
-xfs_dir2_lookup(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
-	char		*name,		/* lookup name */
-	int		namelen,	/* lookup name length */
+int
+xfs_dir_lookup(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
+	char		*name,
+	int		namelen,
 	xfs_ino_t	*inum)		/* out: inode number */
 {
-	xfs_da_args_t	args;		/* operation arguments */
-	int		rval;		/* return value */
+	xfs_da_args_t	args;
+	int		rval;
 	int		v;		/* type-checking value */
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 	XFS_STATS_INC(xs_dir_lookup);
 
-	/*
-	 * Fill in the arg structure for this request.
-	 */
 	args.name = name;
 	args.namelen = namelen;
 	args.hashval = xfs_da_hashname(name, namelen);
@@ -258,18 +221,16 @@ xfs_dir2_lookup(
 	args.trans = tp;
 	args.justcheck = args.addname = 0;
 	args.oknoent = 1;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
+
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_lookup(&args);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_lookup(&args);
-	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_leaf_lookup(&args);
 	else
 		rval = xfs_dir2_node_lookup(&args);
@@ -283,26 +244,24 @@ xfs_dir2_lookup(
 /*
  * Remove an entry from a directory.
  */
-static int				/* error */
-xfs_dir2_removename(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
-	char		*name,		/* name of entry to remove */
-	int		namelen,	/* name length of entry to remove */
-	xfs_ino_t	ino,		/* inode number of entry to remove */
+int
+xfs_dir_removename(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
+	char		*name,
+	int		namelen,
+	xfs_ino_t	ino,
 	xfs_fsblock_t	*first,		/* bmap's firstblock */
 	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */
 	xfs_extlen_t	total)		/* bmap's total block count */
 {
-	xfs_da_args_t	args;		/* operation arguments */
-	int		rval;		/* return value */
+	xfs_da_args_t	args;
+	int		rval;
 	int		v;		/* type-checking value */
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 	XFS_STATS_INC(xs_dir_remove);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
+
 	args.name = name;
 	args.namelen = namelen;
 	args.hashval = xfs_da_hashname(name, namelen);
@@ -314,18 +273,16 @@ xfs_dir2_removename(
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
 	args.justcheck = args.addname = args.oknoent = 0;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
+
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_removename(&args);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_removename(&args);
-	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_leaf_removename(&args);
 	else
 		rval = xfs_dir2_node_removename(&args);
@@ -335,10 +292,10 @@ xfs_dir2_removename(
 /*
  * Read a directory.
  */
-static int				/* error */
-xfs_dir2_getdents(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
+int
+xfs_dir_getdents(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
 	uio_t		*uio,		/* caller's buffer control */
 	int		*eofp)		/* out: eof reached */
 {
@@ -367,14 +324,11 @@ xfs_dir2_getdents(
 	}
 
 	*eofp = 0;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_getdents(dp, uio, eofp, dbp, put);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_getdents(tp, dp, uio, eofp, dbp, put);
 	else
 		rval = xfs_dir2_leaf_getdents(tp, dp, uio, eofp, dbp, put);
@@ -386,29 +340,26 @@ xfs_dir2_getdents(
 /*
  * Replace the inode number of a directory entry.
  */
-static int				/* error */
-xfs_dir2_replace(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
+int
+xfs_dir_replace(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
 	char		*name,		/* name of entry to replace */
-	int		namelen,	/* name length of entry to replace */
+	int		namelen,
 	xfs_ino_t	inum,		/* new inode number */
 	xfs_fsblock_t	*first,		/* bmap's firstblock */
 	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */
 	xfs_extlen_t	total)		/* bmap's total block count */
 {
-	xfs_da_args_t	args;		/* operation arguments */
-	int		rval;		/* return value */
+	xfs_da_args_t	args;
+	int		rval;
 	int		v;		/* type-checking value */
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
 
-	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) {
+	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
 		return rval;
-	}
-	/*
-	 * Fill in the arg structure for this request.
-	 */
+
 	args.name = name;
 	args.namelen = namelen;
 	args.hashval = xfs_da_hashname(name, namelen);
@@ -420,18 +371,16 @@ xfs_dir2_replace(
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
 	args.justcheck = args.addname = args.oknoent = 0;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
+
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_replace(&args);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_replace(&args);
-	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_leaf_replace(&args);
 	else
 		rval = xfs_dir2_node_replace(&args);
@@ -441,21 +390,19 @@ xfs_dir2_replace(
 /*
  * See if this entry can be added to the directory without allocating space.
  */
-static int				/* error */
-xfs_dir2_canenter(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
+int
+xfs_dir_canenter(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
 	char		*name,		/* name of entry to add */
-	int		namelen)	/* name length of entry to add */
+	int		namelen)
 {
-	xfs_da_args_t	args;		/* operation arguments */
-	int		rval;		/* return value */
+	xfs_da_args_t	args;
+	int		rval;
 	int		v;		/* type-checking value */
 
 	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-	/*
-	 * Fill in the arg structure for this request.
-	 */
+
 	args.name = name;
 	args.namelen = namelen;
 	args.hashval = xfs_da_hashname(name, namelen);
@@ -467,18 +414,16 @@ xfs_dir2_canenter(
 	args.whichfork = XFS_DATA_FORK;
 	args.trans = tp;
 	args.justcheck = args.addname = args.oknoent = 1;
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
+
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_addname(&args);
-	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_block_addname(&args);
-	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) {
+	else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
 		return rval;
-	} else if (v)
+	else if (v)
 		rval = xfs_dir2_leaf_addname(&args);
 	else
 		rval = xfs_dir2_node_addname(&args);
@@ -486,19 +431,6 @@ xfs_dir2_canenter(
 }
 
 /*
- * Dummy routine for shortform inode validation.
- * Can't really do this.
- */
-/* ARGSUSED */
-static int				/* error */
-xfs_dir2_shortform_validate_ondisk(
-	xfs_mount_t	*mp,		/* filesystem mount point */
-	xfs_dinode_t	*dip)		/* ondisk inode */
-{
-	return 0;
-}
-
-/*
  * Utility routines.
  */
 
@@ -507,24 +439,24 @@ xfs_dir2_shortform_validate_ondisk(
  * This routine is for data and free blocks, not leaf/node blocks
  * which are handled by xfs_da_grow_inode.
  */
-int					/* error */
+int
 xfs_dir2_grow_inode(
-	xfs_da_args_t	*args,		/* operation arguments */
+	xfs_da_args_t	*args,
 	int		space,		/* v2 dir's space XFS_DIR2_xxx_SPACE */
 	xfs_dir2_db_t	*dbp)		/* out: block number added */
 {
 	xfs_fileoff_t	bno;		/* directory offset of new block */
 	int		count;		/* count of filesystem blocks */
 	xfs_inode_t	*dp;		/* incore directory inode */
-	int		error;		/* error return value */
+	int		error;
 	int		got;		/* blocks actually mapped */
-	int		i;		/* temp mapping index */
+	int		i;
 	xfs_bmbt_irec_t	map;		/* single structure for bmap */
 	int		mapi;		/* mapping index */
 	xfs_bmbt_irec_t	*mapp;		/* bmap mapping structure(s) */
-	xfs_mount_t	*mp;		/* filesystem mount point */
+	xfs_mount_t	*mp;
 	int		nmap;		/* number of bmap entries */
-	xfs_trans_t	*tp;		/* transaction pointer */
+	xfs_trans_t	*tp;
 
 	xfs_dir2_trace_args_s("grow_inode", args, space);
 	dp = args->dp;
@@ -538,9 +470,8 @@ xfs_dir2_grow_inode(
 	/*
 	 * Find the first hole for our block.
 	 */
-	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) {
+	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
 		return error;
-	}
 	nmap = 1;
 	ASSERT(args->firstblock != NULL);
 	/*
@@ -549,13 +480,9 @@ xfs_dir2_grow_inode(
 	if ((error = xfs_bmapi(tp, dp, bno, count,
 			XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
 			args->firstblock, args->total, &map, &nmap,
-			args->flist))) {
+			args->flist, NULL)))
 		return error;
-	}
 	ASSERT(nmap <= 1);
-	/*
-	 * Got it in 1.
-	 */
 	if (nmap == 1) {
 		mapp = &map;
 		mapi = 1;
@@ -585,7 +512,8 @@ xfs_dir2_grow_inode(
 			if ((error = xfs_bmapi(tp, dp, b, c,
 					XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
 					args->firstblock, args->total,
-					&mapp[mapi], &nmap, args->flist))) {
+					&mapp[mapi], &nmap, args->flist,
+					NULL))) {
 				kmem_free(mapp, sizeof(*mapp) * count);
 				return error;
 			}
@@ -645,20 +573,19 @@ xfs_dir2_grow_inode(
 /*
  * See if the directory is a single-block form directory.
  */
-int					/* error */
+int
 xfs_dir2_isblock(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
 	int		*vp)		/* out: 1 is block, 0 is not block */
 {
 	xfs_fileoff_t	last;		/* last file offset */
-	xfs_mount_t	*mp;		/* filesystem mount point */
-	int		rval;		/* return value */
+	xfs_mount_t	*mp;
+	int		rval;
 
 	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) {
+	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
 		return rval;
-	}
 	rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
 	ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
 	*vp = rval;
@@ -668,20 +595,19 @@ xfs_dir2_isblock(
 /*
  * See if the directory is a single-leaf form directory.
  */
-int					/* error */
+int
 xfs_dir2_isleaf(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_inode_t	*dp,		/* incore directory inode */
+	xfs_trans_t	*tp,
+	xfs_inode_t	*dp,
 	int		*vp)		/* out: 1 is leaf, 0 is not leaf */
 {
 	xfs_fileoff_t	last;		/* last file offset */
-	xfs_mount_t	*mp;		/* filesystem mount point */
-	int		rval;		/* return value */
+	xfs_mount_t	*mp;
+	int		rval;
 
 	mp = dp->i_mount;
-	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) {
+	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
 		return rval;
-	}
 	*vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
 	return 0;
 }
@@ -689,9 +615,9 @@ xfs_dir2_isleaf(
 /*
  * Getdents put routine for 64-bit ABI, direct form.
  */
-static int					/* error */
+static int
 xfs_dir2_put_dirent64_direct(
-	xfs_dir2_put_args_t	*pa)		/* argument bundle */
+	xfs_dir2_put_args_t	*pa)
 {
 	xfs_dirent_t		*idbp;		/* dirent pointer */
 	iovec_t			*iovp;		/* io vector */
@@ -726,9 +652,9 @@ xfs_dir2_put_dirent64_direct(
 /*
  * Getdents put routine for 64-bit ABI, uio form.
  */
-static int					/* error */
+static int
 xfs_dir2_put_dirent64_uio(
-	xfs_dir2_put_args_t	*pa)		/* argument bundle */
+	xfs_dir2_put_args_t	*pa)
 {
 	xfs_dirent_t		*idbp;		/* dirent pointer */
 	int			namelen;	/* entry name length */
@@ -764,17 +690,17 @@ xfs_dir2_put_dirent64_uio(
  */
 int
 xfs_dir2_shrink_inode(
-	xfs_da_args_t	*args,		/* operation arguments */
-	xfs_dir2_db_t	db,		/* directory block number */
-	xfs_dabuf_t	*bp)		/* block's buffer */
+	xfs_da_args_t	*args,
+	xfs_dir2_db_t	db,
+	xfs_dabuf_t	*bp)
 {
 	xfs_fileoff_t	bno;		/* directory file offset */
 	xfs_dablk_t	da;		/* directory file offset */
 	int		done;		/* bunmap is finished */
-	xfs_inode_t	*dp;		/* incore directory inode */
-	int		error;		/* error return value */
-	xfs_mount_t	*mp;		/* filesystem mount point */
-	xfs_trans_t	*tp;		/* transaction pointer */
+	xfs_inode_t	*dp;
+	int		error;
+	xfs_mount_t	*mp;
+	xfs_trans_t	*tp;
 
 	xfs_dir2_trace_args_db("shrink_inode", args, db, bp);
 	dp = args->dp;
@@ -786,7 +712,7 @@ xfs_dir2_shrink_inode(
 	 */
 	if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
 			XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
-			&done))) {
+			NULL, &done))) {
 		/*
 		 * ENOSPC actually can happen if we're in a removename with
 		 * no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7dd364b1e03..86560b6f794 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -22,7 +22,9 @@ struct uio;
 struct xfs_dabuf;
 struct xfs_da_args;
 struct xfs_dir2_put_args;
+struct xfs_bmap_free;
 struct xfs_inode;
+struct xfs_mount;
 struct xfs_trans;
 
 /*
@@ -73,7 +75,35 @@ typedef struct xfs_dir2_put_args {
 } xfs_dir2_put_args_t;
 
 /*
- * Other interfaces used by the rest of the dir v2 code.
+ * Generic directory interface routines
+ */
+extern void xfs_dir_startup(void);
+extern void xfs_dir_mount(struct xfs_mount *mp);
+extern int xfs_dir_isempty(struct xfs_inode *dp);
+extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
+				struct xfs_inode *pdp);
+extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
+				char *name, int namelen, xfs_ino_t inum,
+				xfs_fsblock_t *first,
+				struct xfs_bmap_free *flist, xfs_extlen_t tot);
+extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
+				char *name, int namelen, xfs_ino_t *inum);
+extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
+				char *name, int namelen, xfs_ino_t ino,
+				xfs_fsblock_t *first,
+				struct xfs_bmap_free *flist, xfs_extlen_t tot);
+extern int xfs_dir_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+				uio_t *uio, int *eofp);
+extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
+				char *name, int namelen, xfs_ino_t inum,
+				xfs_fsblock_t *first,
+				struct xfs_bmap_free *flist, xfs_extlen_t tot);
+extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
+				char *name, int namelen);
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+
+/*
+ * Utility routines for v2 directories.
  */
 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
 				xfs_dir2_db_t *dbp);
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 972ded59547..9d7438bba30 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -22,19 +22,16 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
@@ -51,6 +48,18 @@ static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp,
 				     int *entno);
 static int xfs_dir2_block_sort(const void *a, const void *b);
 
+static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+/*
+ * One-time startup routine called from xfs_init().
+ */
+void
+xfs_dir_startup(void)
+{
+	xfs_dir_hash_dot = xfs_da_hashname(".", 1);
+	xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
+}
+
 /*
  * Add an entry to a block directory.
  */
@@ -400,7 +409,7 @@ xfs_dir2_block_addname(
 	/*
 	 * Create the new data entry.
 	 */
-	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -508,7 +517,7 @@ xfs_dir2_block_getdents(
 
 		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
 						    ptr - (char *)block);
-		p.ino = INT_GET(dep->inumber, ARCH_CONVERT);
+		p.ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
 		p.ino += mp->m_inoadd;
 #endif
@@ -626,7 +635,7 @@ xfs_dir2_block_lookup(
 	/*
 	 * Fill in inode number, release the block.
 	 */
-	args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+	args->inumber = be64_to_cpu(dep->inumber);
 	xfs_da_brelse(args->trans, bp);
 	return XFS_ERROR(EEXIST);
 }
@@ -844,11 +853,11 @@ xfs_dir2_block_replace(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
-	ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber);
+	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
 	 */
-	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->inumber = cpu_to_be64(args->inumber);
 	xfs_dir2_data_log_entry(args->trans, bp, dep);
 	xfs_dir2_data_check(dp, bp);
 	xfs_da_buf_done(bp);
@@ -1130,7 +1139,7 @@ xfs_dir2_sf_to_block(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
-	INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino);
+	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1144,7 +1153,7 @@ xfs_dir2_sf_to_block(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
-	INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+	dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1193,7 +1202,7 @@ xfs_dir2_sf_to_block(
 		 * Copy a real entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
-		INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER(sfp,
+		dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp,
 				XFS_DIR2_SF_INUMBERP(sfep)));
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index bb3d03ff002..f7c79921707 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -22,18 +22,15 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"
@@ -133,7 +130,7 @@ xfs_dir2_data_check(
 		 */
 		dep = (xfs_dir2_data_entry_t *)p;
 		ASSERT(dep->namelen != 0);
-		ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0);
+		ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
 		ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
 		       (char *)dep - (char *)d);
 		count++;
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 0847cbb53e1..a6ae2d21c40 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -85,11 +85,11 @@ typedef struct xfs_dir2_data_hdr {
  * Tag appears as the last 2 bytes.
  */
 typedef struct xfs_dir2_data_entry {
-	xfs_ino_t		inumber;	/* inode number */
-	__uint8_t		namelen;	/* name length */
-	__uint8_t		name[1];	/* name bytes, no null */
+	__be64			inumber;	/* inode number */
+	__u8			namelen;	/* name length */
+	__u8			name[1];	/* name bytes, no null */
 						/* variable offset */
-	xfs_dir2_data_off_t	tag;		/* starting offset of us */
+	__be16			tag;		/* starting offset of us */
 } xfs_dir2_data_entry_t;
 
 /*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 0f5e2f2ce6e..b1cf1fbf423 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -407,7 +405,7 @@ xfs_dir2_leaf_addname(
 	 * Initialize our new entry (at last).
 	 */
 	dep = (xfs_dir2_data_entry_t *)dup;
-	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -884,7 +882,7 @@ xfs_dir2_leaf_getdents(
 					XFS_DIR2_BYTE_TO_DA(mp,
 						XFS_DIR2_LEAF_OFFSET) - map_off,
 					XFS_BMAPI_METADATA, NULL, 0,
-					&map[map_valid], &nmap, NULL);
+					&map[map_valid], &nmap, NULL, NULL);
 				/*
 				 * Don't know if we should ignore this or
 				 * try to return an error.
@@ -1098,7 +1096,7 @@ xfs_dir2_leaf_getdents(
 
 		p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
 
-		p->ino = INT_GET(dep->inumber, ARCH_CONVERT);
+		p->ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
 		p->ino += mp->m_inoadd;
 #endif
@@ -1319,7 +1317,7 @@ xfs_dir2_leaf_lookup(
 	/*
 	 * Return the found inode number.
 	 */
-	args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+	args->inumber = be64_to_cpu(dep->inumber);
 	xfs_da_brelse(tp, dbp);
 	xfs_da_brelse(tp, lbp);
 	return XFS_ERROR(EEXIST);
@@ -1606,11 +1604,11 @@ xfs_dir2_leaf_replace(
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
 	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
-	ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
+	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
 	/*
 	 * Put the new inode number in, log it.
 	 */
-	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->inumber = cpu_to_be64(args->inumber);
 	tp = args->trans;
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	xfs_da_buf_done(dbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ac511ab9c52..9ca71719b68 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -22,13 +22,11 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -505,7 +503,6 @@ xfs_dir2_leafn_lookup_int(
 							XFS_DATA_FORK))) {
 						return error;
 					}
-					curfdb = newfdb;
 					free = curbp->data;
 					ASSERT(be32_to_cpu(free->hdr.magic) ==
 					       XFS_DIR2_FREE_MAGIC);
@@ -527,8 +524,11 @@ xfs_dir2_leafn_lookup_int(
 				if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
 					XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
 							 XFS_ERRLEVEL_LOW, mp);
+					if (curfdb != newfdb)
+						xfs_da_brelse(tp, curbp);
 					return XFS_ERROR(EFSCORRUPTED);
 				}
+				curfdb = newfdb;
 				if (be16_to_cpu(free->bests[fi]) >= length) {
 					*indexp = index;
 					state->extravalid = 1;
@@ -580,7 +580,7 @@ xfs_dir2_leafn_lookup_int(
 			if (dep->namelen == args->namelen &&
 			    dep->name[0] == args->name[0] &&
 			    memcmp(dep->name, args->name, args->namelen) == 0) {
-				args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+				args->inumber = be64_to_cpu(dep->inumber);
 				*indexp = index;
 				state->extravalid = 1;
 				state->extrablk.bp = curbp;
@@ -970,7 +970,7 @@ xfs_dir2_leafn_remove(
 			/*
 			 * One less used entry in the free table.
 			 */
-			free->hdr.nused = cpu_to_be32(-1);
+			be32_add(&free->hdr.nused, -1);
 			xfs_dir2_free_log_header(tp, fbp);
 			/*
 			 * If this was the last entry in the table, we can
@@ -1695,7 +1695,7 @@ xfs_dir2_node_addname_int(
 	 * Fill in the new entry and log it.
 	 */
 	dep = (xfs_dir2_data_entry_t *)dup;
-	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
@@ -1905,11 +1905,11 @@ xfs_dir2_node_replace(
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
 		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
-		ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT));
+		ASSERT(inum != be64_to_cpu(dep->inumber));
 		/*
 		 * Fill in the new inode number and log the entry.
 		 */
-		INT_SET(dep->inumber, ARCH_CONVERT, inum);
+		dep->inumber = cpu_to_be64(inum);
 		xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
 		rval = 0;
 	}
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index d98a41d1fe6..0cd77b17bf9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -22,19 +22,16 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_error.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
@@ -117,13 +114,13 @@ xfs_dir2_block_sfsize(
 			dep->name[0] == '.' && dep->name[1] == '.';
 #if XFS_BIG_INUMS
 		if (!isdot)
-			i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM;
+			i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
 #endif
 		if (!isdot && !isdotdot) {
 			count++;
 			namelen += dep->namelen;
 		} else if (isdotdot)
-			parent = INT_GET(dep->inumber, ARCH_CONVERT);
+			parent = be64_to_cpu(dep->inumber);
 		/*
 		 * Calculate the new size, see if we should give up yet.
 		 */
@@ -229,13 +226,13 @@ xfs_dir2_block_to_sf(
 		 * Skip .
 		 */
 		if (dep->namelen == 1 && dep->name[0] == '.')
-			ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino);
+			ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino);
 		/*
 		 * Skip .., but make sure the inode number is right.
 		 */
 		else if (dep->namelen == 2 &&
 			 dep->name[0] == '.' && dep->name[1] == '.')
-			ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) ==
+			ASSERT(be64_to_cpu(dep->inumber) ==
 			       XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
 		/*
 		 * Normal entry, copy it into shortform.
@@ -246,7 +243,7 @@ xfs_dir2_block_to_sf(
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)block));
 			memcpy(sfep->name, dep->name, dep->namelen);
-			temp=INT_GET(dep->inumber, ARCH_CONVERT);
+			temp = be64_to_cpu(dep->inumber);
 			XFS_DIR2_SF_PUT_INUMBER(sfp, &temp,
 				XFS_DIR2_SF_INUMBERP(sfep));
 			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index c626943b411..f3fb2ffd6f5 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -19,11 +19,9 @@
 #include "xfs_fs.h"
 #include "xfs_types.h"
 #include "xfs_inum.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
deleted file mode 100644
index 6d711869262..00000000000
--- a/fs/xfs/xfs_dir_leaf.c
+++ /dev/null
@@ -1,2213 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_dir_leaf.h"
-#include "xfs_error.h"
-
-/*
- * xfs_dir_leaf.c
- *
- * Routines to implement leaf blocks of directories as Btrees of hashed names.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Routines used for growing the Btree.
- */
-STATIC void xfs_dir_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
-					      int insertion_index,
-					      int freemap_index);
-STATIC int xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer,
-					    int musthave, int justcheck);
-STATIC void xfs_dir_leaf_rebalance(xfs_da_state_t *state,
-						  xfs_da_state_blk_t *blk1,
-						  xfs_da_state_blk_t *blk2);
-STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
-					  xfs_da_state_blk_t *leaf_blk_1,
-					  xfs_da_state_blk_t *leaf_blk_2,
-					  int *number_entries_in_blk1,
-					  int *number_namebytes_in_blk1);
-
-STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
-				xfs_dablk_t which_block,
-				struct xfs_dabuf **bpp);
-
-/*
- * Utility routines.
- */
-STATIC void xfs_dir_leaf_moveents(xfs_dir_leafblock_t *src_leaf,
-					      int src_start,
-					      xfs_dir_leafblock_t *dst_leaf,
-					      int dst_start, int move_count,
-					      xfs_mount_t *mp);
-
-
-/*========================================================================
- * External routines when dirsize < XFS_IFORK_DSIZE(dp).
- *========================================================================*/
-
-
-/*
- * Validate a given inode number.
- */
-int
-xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
-{
-	xfs_agblock_t	agblkno;
-	xfs_agino_t	agino;
-	xfs_agnumber_t	agno;
-	int		ino_ok;
-	int		ioff;
-
-	agno = XFS_INO_TO_AGNO(mp, ino);
-	agblkno = XFS_INO_TO_AGBNO(mp, ino);
-	ioff = XFS_INO_TO_OFFSET(mp, ino);
-	agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
-	ino_ok =
-		agno < mp->m_sb.sb_agcount &&
-		agblkno < mp->m_sb.sb_agblocks &&
-		agblkno != 0 &&
-		ioff < (1 << mp->m_sb.sb_inopblog) &&
-		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
-	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
-			XFS_RANDOM_DIR_INO_VALIDATE))) {
-		xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
-				(unsigned long long) ino);
-		XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-	return 0;
-}
-
-/*
- * Create the initial contents of a shortform directory.
- */
-int
-xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
-{
-	xfs_dir_sf_hdr_t *hdr;
-	xfs_inode_t *dp;
-
-	dp = args->dp;
-	ASSERT(dp != NULL);
-	ASSERT(dp->i_d.di_size == 0);
-	if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
-		dp->i_df.if_flags &= ~XFS_IFEXTENTS;	/* just in case */
-		dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
-		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
-		dp->i_df.if_flags |= XFS_IFINLINE;
-	}
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	ASSERT(dp->i_df.if_bytes == 0);
-	xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
-	hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	XFS_DIR_SF_PUT_DIRINO(&parent, &hdr->parent);
-
-	hdr->count = 0;
-	dp->i_d.di_size = sizeof(*hdr);
-	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-	return 0;
-}
-
-/*
- * Add a name to the shortform directory structure.
- * Overflow from the inode has already been checked for.
- */
-int
-xfs_dir_shortform_addname(xfs_da_args_t *args)
-{
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	int i, offset, size;
-	xfs_inode_t *dp;
-
-	dp = args->dp;
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Catch the case where the conversion from shortform to leaf
-	 * failed part way through.
-	 */
-	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return XFS_ERROR(EIO);
-	}
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	sfe = &sf->list[0];
-	for (i = sf->hdr.count-1; i >= 0; i--) {
-		if (sfe->namelen == args->namelen &&
-		    args->name[0] == sfe->name[0] &&
-		    memcmp(args->name, sfe->name, args->namelen) == 0)
-			return XFS_ERROR(EEXIST);
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-
-	offset = (int)((char *)sfe - (char *)sf);
-	size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
-	xfs_idata_realloc(dp, size, XFS_DATA_FORK);
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
-
-	XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
-	sfe->namelen = args->namelen;
-	memcpy(sfe->name, args->name, sfe->namelen);
-	sf->hdr.count++;
-
-	dp->i_d.di_size += size;
-	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-
-	return 0;
-}
-
-/*
- * Remove a name from the shortform directory structure.
- */
-int
-xfs_dir_shortform_removename(xfs_da_args_t *args)
-{
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	int base, size = 0, i;
-	xfs_inode_t *dp;
-
-	dp = args->dp;
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Catch the case where the conversion from shortform to leaf
-	 * failed part way through.
-	 */
-	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return XFS_ERROR(EIO);
-	}
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	base = sizeof(xfs_dir_sf_hdr_t);
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	sfe = &sf->list[0];
-	for (i = sf->hdr.count-1; i >= 0; i--) {
-		size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
-		if (sfe->namelen == args->namelen &&
-		    sfe->name[0] == args->name[0] &&
-		    memcmp(sfe->name, args->name, args->namelen) == 0)
-			break;
-		base += size;
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-	if (i < 0) {
-		ASSERT(args->oknoent);
-		return XFS_ERROR(ENOENT);
-	}
-
-	if ((base + size) != dp->i_d.di_size) {
-		memmove(&((char *)sf)[base], &((char *)sf)[base+size],
-					      dp->i_d.di_size - (base+size));
-	}
-	sf->hdr.count--;
-
-	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
-	dp->i_d.di_size -= size;
-	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-
-	return 0;
-}
-
-/*
- * Look up a name in a shortform directory structure.
- */
-int
-xfs_dir_shortform_lookup(xfs_da_args_t *args)
-{
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	int i;
-	xfs_inode_t *dp;
-
-	dp = args->dp;
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Catch the case where the conversion from shortform to leaf
-	 * failed part way through.
-	 */
-	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return XFS_ERROR(EIO);
-	}
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	if (args->namelen == 2 &&
-	    args->name[0] == '.' && args->name[1] == '.') {
-		XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &args->inumber);
-		return(XFS_ERROR(EEXIST));
-	}
-	if (args->namelen == 1 && args->name[0] == '.') {
-		args->inumber = dp->i_ino;
-		return(XFS_ERROR(EEXIST));
-	}
-	sfe = &sf->list[0];
-	for (i = sf->hdr.count-1; i >= 0; i--) {
-		if (sfe->namelen == args->namelen &&
-		    sfe->name[0] == args->name[0] &&
-		    memcmp(args->name, sfe->name, args->namelen) == 0) {
-			XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args->inumber);
-			return(XFS_ERROR(EEXIST));
-		}
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-	ASSERT(args->oknoent);
-	return(XFS_ERROR(ENOENT));
-}
-
-/*
- * Convert from using the shortform to the leaf.
- */
-int
-xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
-{
-	xfs_inode_t *dp;
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	xfs_da_args_t args;
-	xfs_ino_t inumber;
-	char *tmpbuffer;
-	int retval, i, size;
-	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
-
-	dp = iargs->dp;
-	/*
-	 * Catch the case where the conversion from shortform to leaf
-	 * failed part way through.
-	 */
-	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return XFS_ERROR(EIO);
-	}
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	size = dp->i_df.if_bytes;
-	tmpbuffer = kmem_alloc(size, KM_SLEEP);
-	ASSERT(tmpbuffer != NULL);
-
-	memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
-
-	sf = (xfs_dir_shortform_t *)tmpbuffer;
-	XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &inumber);
-
-	xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
-	dp->i_d.di_size = 0;
-	xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
-	retval = xfs_da_grow_inode(iargs, &blkno);
-	if (retval)
-		goto out;
-
-	ASSERT(blkno == 0);
-	retval = xfs_dir_leaf_create(iargs, blkno, &bp);
-	if (retval)
-		goto out;
-	xfs_da_buf_done(bp);
-
-	args.name = ".";
-	args.namelen = 1;
-	args.hashval = xfs_dir_hash_dot;
-	args.inumber = dp->i_ino;
-	args.dp = dp;
-	args.firstblock = iargs->firstblock;
-	args.flist = iargs->flist;
-	args.total = iargs->total;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = iargs->trans;
-	args.justcheck = 0;
-	args.addname = args.oknoent = 1;
-	retval = xfs_dir_leaf_addname(&args);
-	if (retval)
-		goto out;
-
-	args.name = "..";
-	args.namelen = 2;
-	args.hashval = xfs_dir_hash_dotdot;
-	args.inumber = inumber;
-	retval = xfs_dir_leaf_addname(&args);
-	if (retval)
-		goto out;
-
-	sfe = &sf->list[0];
-	for (i = 0; i < sf->hdr.count; i++) {
-		args.name = (char *)(sfe->name);
-		args.namelen = sfe->namelen;
-		args.hashval = xfs_da_hashname((char *)(sfe->name),
-					       sfe->namelen);
-		XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args.inumber);
-		retval = xfs_dir_leaf_addname(&args);
-		if (retval)
-			goto out;
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-	retval = 0;
-
-out:
-	kmem_free(tmpbuffer, size);
-	return retval;
-}
-
-STATIC int
-xfs_dir_shortform_compare(const void *a, const void *b)
-{
-	xfs_dir_sf_sort_t *sa, *sb;
-
-	sa = (xfs_dir_sf_sort_t *)a;
-	sb = (xfs_dir_sf_sort_t *)b;
-	if (sa->hash < sb->hash)
-		return -1;
-	else if (sa->hash > sb->hash)
-		return 1;
-	else
-		return sa->entno - sb->entno;
-}
-
-/*
- * Copy out directory entries for getdents(), for shortform directories.
- */
-/*ARGSUSED*/
-int
-xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
-				       xfs_dirent_t *dbp, xfs_dir_put_t put)
-{
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	int retval, i, sbsize, nsbuf, lastresid=0, want_entno;
-	xfs_mount_t *mp;
-	xfs_dahash_t cookhash, hash;
-	xfs_dir_put_args_t p;
-	xfs_dir_sf_sort_t *sbuf, *sbp;
-
-	mp = dp->i_mount;
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
-	want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
-	nsbuf = sf->hdr.count + 2;
-	sbsize = (nsbuf + 1) * sizeof(*sbuf);
-	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
-
-	xfs_dir_trace_g_du("sf: start", dp, uio);
-
-	/*
-	 * Collect all the entries into the buffer.
-	 * Entry 0 is .
-	 */
-	sbp->entno = 0;
-	sbp->seqno = 0;
-	sbp->hash = xfs_dir_hash_dot;
-	sbp->ino = dp->i_ino;
-	sbp->name = ".";
-	sbp->namelen = 1;
-	sbp++;
-
-	/*
-	 * Entry 1 is ..
-	 */
-	sbp->entno = 1;
-	sbp->seqno = 0;
-	sbp->hash = xfs_dir_hash_dotdot;
-	sbp->ino = XFS_GET_DIR_INO8(sf->hdr.parent);
-	sbp->name = "..";
-	sbp->namelen = 2;
-	sbp++;
-
-	/*
-	 * Scan the directory data for the rest of the entries.
-	 */
-	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
-
-		if (unlikely(
-		    ((char *)sfe < (char *)sf) ||
-		    ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) {
-			xfs_dir_trace_g_du("sf: corrupted", dp, uio);
-			XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents",
-					     XFS_ERRLEVEL_LOW, mp, sfe);
-			kmem_free(sbuf, sbsize);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-
-		sbp->entno = i + 2;
-		sbp->seqno = 0;
-		sbp->hash = xfs_da_hashname((char *)sfe->name, sfe->namelen);
-		sbp->ino = XFS_GET_DIR_INO8(sfe->inumber);
-		sbp->name = (char *)sfe->name;
-		sbp->namelen = sfe->namelen;
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-		sbp++;
-	}
-
-	/*
-	 * Sort the entries on hash then entno.
-	 */
-	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
-	/*
-	 * Stuff in last entry.
-	 */
-	sbp->entno = nsbuf;
-	sbp->hash = XFS_DA_MAXHASH;
-	sbp->seqno = 0;
-	/*
-	 * Figure out the sequence numbers in case there's a hash duplicate.
-	 */
-	for (hash = sbuf->hash, sbp = sbuf + 1;
-				sbp < &sbuf[nsbuf + 1]; sbp++) {
-		if (sbp->hash == hash)
-			sbp->seqno = sbp[-1].seqno + 1;
-		else
-			hash = sbp->hash;
-	}
-
-	/*
-	 * Set up put routine.
-	 */
-	p.dbp = dbp;
-	p.put = put;
-	p.uio = uio;
-
-	/*
-	 * Find our place.
-	 */
-	for (sbp = sbuf; sbp < &sbuf[nsbuf + 1]; sbp++) {
-		if (sbp->hash > cookhash ||
-		    (sbp->hash == cookhash && sbp->seqno >= want_entno))
-			break;
-	}
-
-	/*
-	 * Did we fail to find anything?  We stop at the last entry,
-	 * the one we put maxhash into.
-	 */
-	if (sbp == &sbuf[nsbuf]) {
-		kmem_free(sbuf, sbsize);
-		xfs_dir_trace_g_du("sf: hash beyond end", dp, uio);
-		uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
-		*eofp = 1;
-		return 0;
-	}
-
-	/*
-	 * Loop putting entries into the user buffer.
-	 */
-	while (sbp < &sbuf[nsbuf]) {
-		/*
-		 * Save the first resid in a run of equal-hashval entries
-		 * so that we can back them out if they don't all fit.
-		 */
-		if (sbp->seqno == 0 || sbp == sbuf)
-			lastresid = uio->uio_resid;
-		XFS_PUT_COOKIE(p.cook, mp, 0, sbp[1].seqno, sbp[1].hash);
-		p.ino = sbp->ino;
-#if XFS_BIG_INUMS
-		p.ino += mp->m_inoadd;
-#endif
-		p.name = sbp->name;
-		p.namelen = sbp->namelen;
-		retval = p.put(&p);
-		if (!p.done) {
-			uio->uio_offset =
-				XFS_DA_MAKE_COOKIE(mp, 0, 0, sbp->hash);
-			kmem_free(sbuf, sbsize);
-			uio->uio_resid = lastresid;
-			xfs_dir_trace_g_du("sf: E-O-B", dp, uio);
-			return retval;
-		}
-		sbp++;
-	}
-	kmem_free(sbuf, sbsize);
-	uio->uio_offset = p.cook.o;
-	*eofp = 1;
-	xfs_dir_trace_g_du("sf: E-O-F", dp, uio);
-	return 0;
-}
-
-/*
- * Look up a name in a shortform directory structure, replace the inode number.
- */
-int
-xfs_dir_shortform_replace(xfs_da_args_t *args)
-{
-	xfs_dir_shortform_t *sf;
-	xfs_dir_sf_entry_t *sfe;
-	xfs_inode_t *dp;
-	int i;
-
-	dp = args->dp;
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Catch the case where the conversion from shortform to leaf
-	 * failed part way through.
-	 */
-	if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return XFS_ERROR(EIO);
-	}
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-	sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
-	if (args->namelen == 2 &&
-	    args->name[0] == '.' && args->name[1] == '.') {
-		/* XXX - replace assert? */
-		XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
-		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
-		return 0;
-	}
-	ASSERT(args->namelen != 1 || args->name[0] != '.');
-	sfe = &sf->list[0];
-	for (i = sf->hdr.count-1; i >= 0; i--) {
-		if (sfe->namelen == args->namelen &&
-		    sfe->name[0] == args->name[0] &&
-		    memcmp(args->name, sfe->name, args->namelen) == 0) {
-			ASSERT(memcmp((char *)&args->inumber,
-				(char *)&sfe->inumber, sizeof(xfs_ino_t)));
-			XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
-			xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
-			return 0;
-		}
-		sfe = XFS_DIR_SF_NEXTENTRY(sfe);
-	}
-	ASSERT(args->oknoent);
-	return XFS_ERROR(ENOENT);
-}
-
-/*
- * Convert a leaf directory to shortform structure
- */
-int
-xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_hdr_t *hdr;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-	xfs_da_args_t args;
-	xfs_inode_t *dp;
-	xfs_ino_t parent = 0;
-	char *tmpbuffer;
-	int retval, i;
-	xfs_dabuf_t *bp;
-
-	dp = iargs->dp;
-	tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
-	ASSERT(tmpbuffer != NULL);
-
-	retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
-					       XFS_DATA_FORK);
-	if (retval)
-		goto out;
-	ASSERT(bp != NULL);
-	memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
-	leaf = (xfs_dir_leafblock_t *)tmpbuffer;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
-
-	/*
-	 * Find and special case the parent inode number
-	 */
-	hdr = &leaf->hdr;
-	entry = &leaf->entries[0];
-	for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-		if ((entry->namelen == 2) &&
-		    (namest->name[0] == '.') &&
-		    (namest->name[1] == '.')) {
-			XFS_DIR_SF_GET_DIRINO(&namest->inumber, &parent);
-			entry->nameidx = 0;
-		} else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
-			entry->nameidx = 0;
-		}
-	}
-	retval = xfs_da_shrink_inode(iargs, 0, bp);
-	if (retval)
-		goto out;
-	retval = xfs_dir_shortform_create(iargs, parent);
-	if (retval)
-		goto out;
-
-	/*
-	 * Copy the rest of the filenames
-	 */
-	entry = &leaf->entries[0];
-	args.dp = dp;
-	args.firstblock = iargs->firstblock;
-	args.flist = iargs->flist;
-	args.total = iargs->total;
-	args.whichfork = XFS_DATA_FORK;
-	args.trans = iargs->trans;
-	args.justcheck = 0;
-	args.addname = args.oknoent = 1;
-	for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
-		if (!entry->nameidx)
-			continue;
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-		args.name = (char *)(namest->name);
-		args.namelen = entry->namelen;
-		args.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
-		XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args.inumber);
-		xfs_dir_shortform_addname(&args);
-	}
-
-out:
-	kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
-	return retval;
-}
-
-/*
- * Convert from using a single leaf to a root node and a leaf.
- */
-int
-xfs_dir_leaf_to_node(xfs_da_args_t *args)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_da_intnode_t *node;
-	xfs_inode_t *dp;
-	xfs_dabuf_t *bp1, *bp2;
-	xfs_dablk_t blkno;
-	int retval;
-
-	dp = args->dp;
-	retval = xfs_da_grow_inode(args, &blkno);
-	ASSERT(blkno == 1);
-	if (retval)
-		return retval;
-	retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
-					      XFS_DATA_FORK);
-	if (retval)
-		return retval;
-	ASSERT(bp1 != NULL);
-	retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
-					     XFS_DATA_FORK);
-	if (retval) {
-		xfs_da_buf_done(bp1);
-		return retval;
-	}
-	ASSERT(bp2 != NULL);
-	memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
-	xfs_da_buf_done(bp1);
-	xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
-
-	/*
-	 * Set up the new root node.
-	 */
-	retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
-	if (retval) {
-		xfs_da_buf_done(bp2);
-		return retval;
-	}
-	node = bp1->data;
-	leaf = bp2->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	node->btree[0].hashval = cpu_to_be32(
-		INT_GET(leaf->entries[
-			INT_GET(leaf->hdr.count, ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
-	xfs_da_buf_done(bp2);
-	node->btree[0].before = cpu_to_be32(blkno);
-	node->hdr.count = cpu_to_be16(1);
-	xfs_da_log_buf(args->trans, bp1,
-		XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
-	xfs_da_buf_done(bp1);
-
-	return retval;
-}
-
-
-/*========================================================================
- * Routines used for growing the Btree.
- *========================================================================*/
-
-/*
- * Create the initial contents of a leaf directory
- * or a leaf in a node directory.
- */
-STATIC int
-xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_hdr_t *hdr;
-	xfs_inode_t *dp;
-	xfs_dabuf_t *bp;
-	int retval;
-
-	dp = args->dp;
-	ASSERT(dp != NULL);
-	retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
-	if (retval)
-		return retval;
-	ASSERT(bp != NULL);
-	leaf = bp->data;
-	memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
-	hdr = &leaf->hdr;
-	hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC);
-	INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
-	if (!hdr->firstused)
-		INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
-	INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
-	INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT));
-
-	xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
-
-	*bpp = bp;
-	return 0;
-}
-
-/*
- * Split the leaf node, rebalance, then add the new entry.
- */
-int
-xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
-				  xfs_da_state_blk_t *newblk)
-{
-	xfs_dablk_t blkno;
-	xfs_da_args_t *args;
-	int error;
-
-	/*
-	 * Allocate space for a new leaf node.
-	 */
-	args = state->args;
-	ASSERT(args != NULL);
-	ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
-	error = xfs_da_grow_inode(args, &blkno);
-	if (error)
-		return error;
-	error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
-	if (error)
-		return error;
-	newblk->blkno = blkno;
-	newblk->magic = XFS_DIR_LEAF_MAGIC;
-
-	/*
-	 * Rebalance the entries across the two leaves.
-	 */
-	xfs_dir_leaf_rebalance(state, oldblk, newblk);
-	error = xfs_da_blk_link(state, oldblk, newblk);
-	if (error)
-		return error;
-
-	/*
-	 * Insert the new entry in the correct block.
-	 */
-	if (state->inleaf) {
-		error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
-	} else {
-		error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
-	}
-
-	/*
-	 * Update last hashval in each block since we added the name.
-	 */
-	oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
-	newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
-	return error;
-}
-
-/*
- * Add a name to the leaf directory structure.
- *
- * Must take into account fragmented leaves and leaves where spacemap has
- * lost some freespace information (ie: holes).
- */
-int
-xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_hdr_t *hdr;
-	xfs_dir_leaf_map_t *map;
-	int tablesize, entsize, sum, i, tmp, error;
-
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
-	hdr = &leaf->hdr;
-	entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
-
-	/*
-	 * Search through freemap for first-fit on new name length.
-	 * (may need to figure in size of entry struct too)
-	 */
-	tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t)
-			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
-	map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
-	for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
-		if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
-			sum += INT_GET(map->size, ARCH_CONVERT);
-			continue;
-		}
-		if (!map->size)
-			continue;	/* no space in this map */
-		tmp = entsize;
-		if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
-			tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
-		if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
-			if (!args->justcheck)
-				xfs_dir_leaf_add_work(bp, args, index, i);
-			return 0;
-		}
-		sum += INT_GET(map->size, ARCH_CONVERT);
-	}
-
-	/*
-	 * If there are no holes in the address space of the block,
-	 * and we don't have enough freespace, then compaction will do us
-	 * no good and we should just give up.
-	 */
-	if (!hdr->holes && (sum < entsize))
-		return XFS_ERROR(ENOSPC);
-
-	/*
-	 * Compact the entries to coalesce free space.
-	 * Pass the justcheck flag so the checking pass can return
-	 * an error, without changing anything, if it won't fit.
-	 */
-	error = xfs_dir_leaf_compact(args->trans, bp,
-			args->total == 0 ?
-				entsize +
-				(uint)sizeof(xfs_dir_leaf_entry_t) : 0,
-			args->justcheck);
-	if (error)
-		return error;
-	/*
-	 * After compaction, the block is guaranteed to have only one
-	 * free region, in freemap[0].  If it is not big enough, give up.
-	 */
-	if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
-	    (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
-		return XFS_ERROR(ENOSPC);
-
-	if (!args->justcheck)
-		xfs_dir_leaf_add_work(bp, args, index, 0);
-	return 0;
-}
-
-/*
- * Add a name to a leaf directory structure.
- */
-STATIC void
-xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
-		      int mapindex)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_hdr_t *hdr;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-	xfs_dir_leaf_map_t *map;
-	/* REFERENCED */
-	xfs_mount_t *mp;
-	int tmp, i;
-
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	hdr = &leaf->hdr;
-	ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
-	ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
-
-	/*
-	 * Force open some space in the entry array and fill it in.
-	 */
-	entry = &leaf->entries[index];
-	if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
-		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
-		memmove(entry + 1, entry, tmp);
-		xfs_da_log_buf(args->trans, bp,
-		    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
-	}
-	INT_MOD(hdr->count, ARCH_CONVERT, +1);
-
-	/*
-	 * Allocate space for the new string (at the end of the run).
-	 */
-	map = &hdr->freemap[mapindex];
-	mp = args->trans->t_mountp;
-	ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
-	ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-	INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
-	INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT));
-	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
-	entry->namelen = args->namelen;
-	xfs_da_log_buf(args->trans, bp,
-	    XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-
-	/*
-	 * Copy the string and inode number into the new space.
-	 */
-	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-	XFS_DIR_SF_PUT_DIRINO(&args->inumber, &namest->inumber);
-	memcpy(namest->name, args->name, args->namelen);
-	xfs_da_log_buf(args->trans, bp,
-	    XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
-
-	/*
-	 * Update the control info for this leaf node
-	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
-		INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT);
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
-			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
-	map = &hdr->freemap[0];
-	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
-		if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
-			INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
-			INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
-		}
-	}
-	INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen);
-	xfs_da_log_buf(args->trans, bp,
-		XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
-}
-
-/*
- * Garbage collect a leaf directory block by copying it to a new buffer.
- */
-STATIC int
-xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
-		     int justcheck)
-{
-	xfs_dir_leafblock_t *leaf_s, *leaf_d;
-	xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
-	xfs_mount_t *mp;
-	char *tmpbuffer;
-	char *tmpbuffer2=NULL;
-	int rval;
-	int lbsize;
-
-	mp = trans->t_mountp;
-	lbsize = XFS_LBSIZE(mp);
-	tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
-	ASSERT(tmpbuffer != NULL);
-	memcpy(tmpbuffer, bp->data, lbsize);
-
-	/*
-	 * Make a second copy in case xfs_dir_leaf_moveents()
-	 * below destroys the original.
-	 */
-	if (musthave || justcheck) {
-		tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
-		memcpy(tmpbuffer2, bp->data, lbsize);
-	}
-	memset(bp->data, 0, lbsize);
-
-	/*
-	 * Copy basic information
-	 */
-	leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
-	leaf_d = bp->data;
-	hdr_s = &leaf_s->hdr;
-	hdr_d = &leaf_d->hdr;
-	hdr_d->info = hdr_s->info;	/* struct copy */
-	INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize);
-	if (!hdr_d->firstused)
-		INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1);
-	hdr_d->namebytes = 0;
-	hdr_d->count = 0;
-	hdr_d->holes = 0;
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
-
-	/*
-	 * Copy all entry's in the same (sorted) order,
-	 * but allocate filenames packed and in sequence.
-	 * This changes the source (leaf_s) as well.
-	 */
-	xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
-
-	if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave)
-		rval = XFS_ERROR(ENOSPC);
-	else
-		rval = 0;
-
-	if (justcheck || rval == ENOSPC) {
-		ASSERT(tmpbuffer2);
-		memcpy(bp->data, tmpbuffer2, lbsize);
-	} else {
-		xfs_da_log_buf(trans, bp, 0, lbsize - 1);
-	}
-
-	kmem_free(tmpbuffer, lbsize);
-	if (musthave || justcheck)
-		kmem_free(tmpbuffer2, lbsize);
-	return rval;
-}
-
-/*
- * Redistribute the directory entries between two leaf nodes,
- * taking into account the size of the new entry.
- *
- * NOTE: if new block is empty, then it will get the upper half of old block.
- */
-STATIC void
-xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
-				      xfs_da_state_blk_t *blk2)
-{
-	xfs_da_state_blk_t *tmp_blk;
-	xfs_dir_leafblock_t *leaf1, *leaf2;
-	xfs_dir_leaf_hdr_t *hdr1, *hdr2;
-	int count, totallen, max, space, swap;
-
-	/*
-	 * Set up environment.
-	 */
-	ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
-	ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
-	leaf1 = blk1->bp->data;
-	leaf2 = blk2->bp->data;
-	ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-
-	/*
-	 * Check ordering of blocks, reverse if it makes things simpler.
-	 */
-	swap = 0;
-	if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
-		tmp_blk = blk1;
-		blk1 = blk2;
-		blk2 = tmp_blk;
-		leaf1 = blk1->bp->data;
-		leaf2 = blk2->bp->data;
-		swap = 1;
-	}
-	hdr1 = &leaf1->hdr;
-	hdr2 = &leaf2->hdr;
-
-	/*
-	 * Examine entries until we reduce the absolute difference in
-	 * byte usage between the two blocks to a minimum.  Then get
-	 * the direction to copy and the number of elements to move.
-	 */
-	state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
-							   &count, &totallen);
-	if (swap)
-		state->inleaf = !state->inleaf;
-
-	/*
-	 * Move any entries required from leaf to leaf:
-	 */
-	if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
-		/*
-		 * Figure the total bytes to be added to the destination leaf.
-		 */
-		count = INT_GET(hdr1->count, ARCH_CONVERT) - count;	/* number entries being moved */
-		space  = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen;
-		space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
-		space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
-
-		/*
-		 * leaf2 is the destination, compact it if it looks tight.
-		 */
-		max  = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
-		max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
-		if (space > max) {
-			xfs_dir_leaf_compact(state->args->trans, blk2->bp,
-								 0, 0);
-		}
-
-		/*
-		 * Move high entries from leaf1 to low end of leaf2.
-		 */
-		xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count,
-					     leaf2, 0, count, state->mp);
-
-		xfs_da_log_buf(state->args->trans, blk1->bp, 0,
-						   state->blocksize-1);
-		xfs_da_log_buf(state->args->trans, blk2->bp, 0,
-						   state->blocksize-1);
-
-	} else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
-		/*
-		 * Figure the total bytes to be added to the destination leaf.
-		 */
-		count -= INT_GET(hdr1->count, ARCH_CONVERT);		/* number entries being moved */
-		space  = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT);
-		space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
-		space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
-
-		/*
-		 * leaf1 is the destination, compact it if it looks tight.
-		 */
-		max  = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
-		max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
-		if (space > max) {
-			xfs_dir_leaf_compact(state->args->trans, blk1->bp,
-								 0, 0);
-		}
-
-		/*
-		 * Move low entries from leaf2 to high end of leaf1.
-		 */
-		xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT),
-					     count, state->mp);
-
-		xfs_da_log_buf(state->args->trans, blk1->bp, 0,
-						   state->blocksize-1);
-		xfs_da_log_buf(state->args->trans, blk2->bp, 0,
-						   state->blocksize-1);
-	}
-
-	/*
-	 * Copy out last hashval in each block for B-tree code.
-	 */
-	blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
-	blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
-
-	/*
-	 * Adjust the expected index for insertion.
-	 * GROT: this doesn't work unless blk2 was originally empty.
-	 */
-	if (!state->inleaf) {
-		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
-	}
-}
-
-/*
- * Examine entries until we reduce the absolute difference in
- * byte usage between the two blocks to a minimum.
- * GROT: Is this really necessary?  With other than a 512 byte blocksize,
- * GROT: there will always be enough room in either block for a new entry.
- * GROT: Do a double-split for this case?
- */
-STATIC int
-xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
-					   xfs_da_state_blk_t *blk1,
-					   xfs_da_state_blk_t *blk2,
-					   int *countarg, int *namebytesarg)
-{
-	xfs_dir_leafblock_t *leaf1, *leaf2;
-	xfs_dir_leaf_hdr_t *hdr1, *hdr2;
-	xfs_dir_leaf_entry_t *entry;
-	int count, max, totallen, half;
-	int lastdelta, foundit, tmp;
-
-	/*
-	 * Set up environment.
-	 */
-	leaf1 = blk1->bp->data;
-	leaf2 = blk2->bp->data;
-	hdr1 = &leaf1->hdr;
-	hdr2 = &leaf2->hdr;
-	foundit = 0;
-	totallen = 0;
-
-	/*
-	 * Examine entries until we reduce the absolute difference in
-	 * byte usage between the two blocks to a minimum.
-	 */
-	max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT);
-	half  = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
-	half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen;
-	half /= 2;
-	lastdelta = state->blocksize;
-	entry = &leaf1->entries[0];
-	for (count = 0; count < max; entry++, count++) {
-
-#define XFS_DIR_ABS(A)	(((A) < 0) ? -(A) : (A))
-		/*
-		 * The new entry is in the first block, account for it.
-		 */
-		if (count == blk1->index) {
-			tmp = totallen + (uint)sizeof(*entry)
-				+ XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
-			if (XFS_DIR_ABS(half - tmp) > lastdelta)
-				break;
-			lastdelta = XFS_DIR_ABS(half - tmp);
-			totallen = tmp;
-			foundit = 1;
-		}
-
-		/*
-		 * Wrap around into the second block if necessary.
-		 */
-		if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
-			leaf1 = leaf2;
-			entry = &leaf1->entries[0];
-		}
-
-		/*
-		 * Figure out if next leaf entry would be too much.
-		 */
-		tmp = totallen + (uint)sizeof(*entry)
-				+ XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
-		if (XFS_DIR_ABS(half - tmp) > lastdelta)
-			break;
-		lastdelta = XFS_DIR_ABS(half - tmp);
-		totallen = tmp;
-#undef XFS_DIR_ABS
-	}
-
-	/*
-	 * Calculate the number of namebytes that will end up in lower block.
-	 * If new entry not in lower block, fix up the count.
-	 */
-	totallen -=
-		count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
-	if (foundit) {
-		totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
-			    state->args->namelen;
-	}
-
-	*countarg = count;
-	*namebytesarg = totallen;
-	return foundit;
-}
-
-/*========================================================================
- * Routines used for shrinking the Btree.
- *========================================================================*/
-
-/*
- * Check a leaf block and its neighbors to see if the block should be
- * collapsed into one or the other neighbor.  Always keep the block
- * with the smaller block number.
- * If the current block is over 50% full, don't try to join it, return 0.
- * If the block is empty, fill in the state structure and return 2.
- * If it can be collapsed, fill in the state structure and return 1.
- * If nothing can be done, return 0.
- */
-int
-xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_da_state_blk_t *blk;
-	xfs_da_blkinfo_t *info;
-	int count, bytes, forward, error, retval, i;
-	xfs_dablk_t blkno;
-	xfs_dabuf_t *bp;
-
-	/*
-	 * Check for the degenerate case of the block being over 50% full.
-	 * If so, it's not worth even looking to see if we might be able
-	 * to coalesce with a sibling.
-	 */
-	blk = &state->path.blk[ state->path.active-1 ];
-	info = blk->bp->data;
-	ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC);
-	leaf = (xfs_dir_leafblock_t *)info;
-	count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-	bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
-		count * (uint)sizeof(xfs_dir_leaf_entry_t) +
-		count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
-		INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
-	if (bytes > (state->blocksize >> 1)) {
-		*action = 0;	/* blk over 50%, don't try to join */
-		return 0;
-	}
-
-	/*
-	 * Check for the degenerate case of the block being empty.
-	 * If the block is empty, we'll simply delete it, no need to
-	 * coalesce it with a sibling block.  We choose (arbitrarily)
-	 * to merge with the forward block unless it is NULL.
-	 */
-	if (count == 0) {
-		/*
-		 * Make altpath point to the block we want to keep and
-		 * path point to the block we want to drop (this one).
-		 */
-		forward = (info->forw != 0);
-		memcpy(&state->altpath, &state->path, sizeof(state->path));
-		error = xfs_da_path_shift(state, &state->altpath, forward,
-						 0, &retval);
-		if (error)
-			return error;
-		if (retval) {
-			*action = 0;
-		} else {
-			*action = 2;
-		}
-		return 0;
-	}
-
-	/*
-	 * Examine each sibling block to see if we can coalesce with
-	 * at least 25% free space to spare.  We need to figure out
-	 * whether to merge with the forward or the backward block.
-	 * We prefer coalescing with the lower numbered sibling so as
-	 * to shrink a directory over time.
-	 */
-	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));	/* start with smaller blk num */
-	for (i = 0; i < 2; forward = !forward, i++) {
-		if (forward)
-			blkno = be32_to_cpu(info->forw);
-		else
-			blkno = be32_to_cpu(info->back);
-		if (blkno == 0)
-			continue;
-		error = xfs_da_read_buf(state->args->trans, state->args->dp,
-							    blkno, -1, &bp,
-							    XFS_DATA_FORK);
-		if (error)
-			return error;
-		ASSERT(bp != NULL);
-
-		leaf = (xfs_dir_leafblock_t *)info;
-		count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		bytes  = state->blocksize - (state->blocksize>>2);
-		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
-		leaf = bp->data;
-		ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-		count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
-		bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
-		bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
-		bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
-		bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
-		if (bytes >= 0)
-			break;	/* fits with at least 25% to spare */
-
-		xfs_da_brelse(state->args->trans, bp);
-	}
-	if (i >= 2) {
-		*action = 0;
-		return 0;
-	}
-	xfs_da_buf_done(bp);
-
-	/*
-	 * Make altpath point to the block we want to keep (the lower
-	 * numbered block) and path point to the block we want to drop.
-	 */
-	memcpy(&state->altpath, &state->path, sizeof(state->path));
-	if (blkno < blk->blkno) {
-		error = xfs_da_path_shift(state, &state->altpath, forward,
-						 0, &retval);
-	} else {
-		error = xfs_da_path_shift(state, &state->path, forward,
-						 0, &retval);
-	}
-	if (error)
-		return error;
-	if (retval) {
-		*action = 0;
-	} else {
-		*action = 1;
-	}
-	return 0;
-}
-
-/*
- * Remove a name from the leaf directory structure.
- *
- * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
- * If two leaves are 37% full, when combined they will leave 25% free.
- */
-int
-xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_hdr_t *hdr;
-	xfs_dir_leaf_map_t *map;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-	int before, after, smallest, entsize;
-	int tablesize, tmp, i;
-	xfs_mount_t *mp;
-
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	hdr = &leaf->hdr;
-	mp = trans->t_mountp;
-	ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
-	ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT)));
-	ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
-	entry = &leaf->entries[index];
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
-	ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
-
-	/*
-	 * Scan through free region table:
-	 *    check for adjacency of free'd entry with an existing one,
-	 *    find smallest free region in case we need to replace it,
-	 *    adjust any map that borders the entry table,
-	 */
-	tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
-			+ (uint)sizeof(xfs_dir_leaf_hdr_t);
-	map = &hdr->freemap[0];
-	tmp = INT_GET(map->size, ARCH_CONVERT);
-	before = after = -1;
-	smallest = XFS_DIR_LEAF_MAPSIZE - 1;
-	entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
-	for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
-		ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
-		if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
-			INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
-			INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
-		}
-
-		if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) {
-			before = i;
-		} else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
-			after = i;
-		} else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
-			tmp = INT_GET(map->size, ARCH_CONVERT);
-			smallest = i;
-		}
-	}
-
-	/*
-	 * Coalesce adjacent freemap regions,
-	 * or replace the smallest region.
-	 */
-	if ((before >= 0) || (after >= 0)) {
-		if ((before >= 0) && (after >= 0)) {
-			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
-			INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
-			hdr->freemap[after].base = 0;
-			hdr->freemap[after].size = 0;
-		} else if (before >= 0) {
-			map = &hdr->freemap[before];
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
-		} else {
-			map = &hdr->freemap[after];
-			INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
-			INT_MOD(map->size, ARCH_CONVERT, entsize);
-		}
-	} else {
-		/*
-		 * Replace smallest region (if it is smaller than free'd entry)
-		 */
-		map = &hdr->freemap[smallest];
-		if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
-			INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
-			INT_SET(map->size, ARCH_CONVERT, entsize);
-		}
-	}
-
-	/*
-	 * Did we remove the first entry?
-	 */
-	if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT))
-		smallest = 1;
-	else
-		smallest = 0;
-
-	/*
-	 * Compress the remaining entries and zero out the removed stuff.
-	 */
-	namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-	memset((char *)namest, 0, entsize);
-	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
-
-	INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
-	tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
-	memmove(entry, entry + 1, tmp);
-	INT_MOD(hdr->count, ARCH_CONVERT, -1);
-	xfs_da_log_buf(trans, bp,
-	    XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
-	entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
-	memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
-
-	/*
-	 * If we removed the first entry, re-find the first used byte
-	 * in the name area.  Note that if the entry was the "firstused",
-	 * then we don't have a "hole" in our block resulting from
-	 * removing the name.
-	 */
-	if (smallest) {
-		tmp = XFS_LBSIZE(mp);
-		entry = &leaf->entries[0];
-		for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
-			ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
-			if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
-				tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
-		}
-		INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
-		if (!hdr->firstused)
-			INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1);
-	} else {
-		hdr->holes = 1;		/* mark as needing compaction */
-	}
-
-	xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
-
-	/*
-	 * Check if leaf is less than 50% full, caller may want to
-	 * "join" the leaf with a sibling if so.
-	 */
-	tmp  = (uint)sizeof(xfs_dir_leaf_hdr_t);
-	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
-	tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
-	tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
-	if (tmp < mp->m_dir_magicpct)
-		return 1;			/* leaf is < 37% full */
-	return 0;
-}
-
-/*
- * Move all the directory entries from drop_leaf into save_leaf.
- */
-void
-xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
-				      xfs_da_state_blk_t *save_blk)
-{
-	xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
-	xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
-	xfs_mount_t *mp;
-	char *tmpbuffer;
-
-	/*
-	 * Set up environment.
-	 */
-	mp = state->mp;
-	ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
-	ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
-	drop_leaf = drop_blk->bp->data;
-	save_leaf = save_blk->bp->data;
-	ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	drop_hdr = &drop_leaf->hdr;
-	save_hdr = &save_leaf->hdr;
-
-	/*
-	 * Save last hashval from dying block for later Btree fixup.
-	 */
-	drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT);
-
-	/*
-	 * Check if we need a temp buffer, or can we do it in place.
-	 * Note that we don't check "leaf" for holes because we will
-	 * always be dropping it, toosmall() decided that for us already.
-	 */
-	if (save_hdr->holes == 0) {
-		/*
-		 * dest leaf has no holes, so we add there.  May need
-		 * to make some room in the entry array.
-		 */
-		if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
-			xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
-						 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
-		} else {
-			xfs_dir_leaf_moveents(drop_leaf, 0,
-					      save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT),
-					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
-		}
-	} else {
-		/*
-		 * Destination has holes, so we make a temporary copy
-		 * of the leaf and add them both to that.
-		 */
-		tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
-		ASSERT(tmpbuffer != NULL);
-		memset(tmpbuffer, 0, state->blocksize);
-		tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
-		tmp_hdr = &tmp_leaf->hdr;
-		tmp_hdr->info = save_hdr->info;	/* struct copy */
-		tmp_hdr->count = 0;
-		INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
-		if (!tmp_hdr->firstused)
-			INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1);
-		tmp_hdr->namebytes = 0;
-		if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
-			xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
-						 (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
-			xfs_dir_leaf_moveents(save_leaf, 0,
-					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-					      (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
-		} else {
-			xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
-						 (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
-			xfs_dir_leaf_moveents(drop_leaf, 0,
-					      tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
-					      (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
-		}
-		memcpy(save_leaf, tmp_leaf, state->blocksize);
-		kmem_free(tmpbuffer, state->blocksize);
-	}
-
-	xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
-					   state->blocksize - 1);
-
-	/*
-	 * Copy out last hashval in each block for B-tree code.
-	 */
-	save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
-}
-
-/*========================================================================
- * Routines used for finding things in the Btree.
- *========================================================================*/
-
-/*
- * Look up a name in a leaf directory structure.
- * This is the internal routine, it uses the caller's buffer.
- *
- * Note that duplicate keys are allowed, but only check within the
- * current leaf node.  The Btree code must check in adjacent leaf nodes.
- *
- * Return in *index the index into the entry[] array of either the found
- * entry, or where the entry should have been (insert before that entry).
- *
- * Don't change the args->inumber unless we find the filename.
- */
-int
-xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
-{
-	xfs_dir_leafblock_t *leaf;
-	xfs_dir_leaf_entry_t *entry;
-	xfs_dir_leaf_name_t *namest;
-	int probe, span;
-	xfs_dahash_t hashval;
-
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
-
-	/*
-	 * Binary search.  (note: small blocks will skip this loop)
-	 */
-	hashval = args->hashval;
-	probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
-	for (entry = &leaf->entries[probe]; span > 4;
-		   entry = &leaf->entries[probe]) {
-		span /= 2;
-		if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
-			probe += span;
-		else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
-			probe -= span;
-		else
-			break;
-	}
-	ASSERT((probe >= 0) && \
-	       ((!leaf->hdr.count) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
-	ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval));
-
-	/*
-	 * Since we may have duplicate hashval's, find the first matching
-	 * hashval in the leaf.
-	 */
-	while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) {
-		entry--;
-		probe--;
-	}
-	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
-		entry++;
-		probe++;
-	}
-	if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
-		*index = probe;
-		ASSERT(args->oknoent);
-		return XFS_ERROR(ENOENT);
-	}
-
-	/*
-	 * Duplicate keys may be present, so search all of them for a match.
-	 */
-	while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) {
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
-		if (entry->namelen == args->namelen &&
-		    namest->name[0] == args->name[0] &&
-		    memcmp(args->name, namest->name, args->namelen) == 0) {
-			XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
-			*index = probe;
-			return XFS_ERROR(EEXIST);
-		}
-		entry++;
-		probe++;
-	}
-	*index = probe;
-	ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
-	return XFS_ERROR(ENOENT);
-}
-
-/*========================================================================
- * Utility routines.
- *========================================================================*/
-
-/*
- * Move the indicated entries from one leaf to another.
- * NOTE: this routine modifies both source and destination leaves.
- */
-/* ARGSUSED */
-STATIC void
-xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
-		      xfs_dir_leafblock_t *leaf_d, int start_d,
-		      int count, xfs_mount_t *mp)
-{
-	xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
-	xfs_dir_leaf_entry_t *entry_s, *entry_d;
-	int tmp, i;
-
-	/*
-	 * Check for nothing to do.
-	 */
-	if (count == 0)
-		return;
-
-	/*
-	 * Set up environment.
-	 */
-	ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	hdr_s = &leaf_s->hdr;
-	hdr_d = &leaf_d->hdr;
-	ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
-	ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s)));
-	ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
-	ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >=
-		((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d)));
-
-	ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
-	ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
-	ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
-
-	/*
-	 * Move the entries in the destination leaf up to make a hole?
-	 */
-	if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
-		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
-		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
-		entry_s = &leaf_d->entries[start_d];
-		entry_d = &leaf_d->entries[start_d + count];
-		memcpy(entry_d, entry_s, tmp);
-	}
-
-	/*
-	 * Copy all entry's in the same (sorted) order,
-	 * but allocate filenames packed and in sequence.
-	 */
-	entry_s = &leaf_s->entries[start_s];
-	entry_d = &leaf_d->entries[start_d];
-	for (i = 0; i < count; entry_s++, entry_d++, i++) {
-		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
-		tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
-		INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp));
-		entry_d->hashval = entry_s->hashval; /* INT_: direct copy */
-		INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT);
-		entry_d->namelen = entry_s->namelen;
-		ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)),
-		       XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp);
-		ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
-		memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
-		      0, tmp);
-		INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
-		INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
-		INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
-		INT_MOD(hdr_d->count, ARCH_CONVERT, +1);
-		tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
-				+ (uint)sizeof(xfs_dir_leaf_hdr_t);
-		ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
-
-	}
-
-	/*
-	 * Zero out the entries we just copied.
-	 */
-	if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
-		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
-		entry_s = &leaf_s->entries[start_s];
-		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		memset((char *)entry_s, 0, tmp);
-	} else {
-		/*
-		 * Move the remaining entries down to fill the hole,
-		 * then zero the entries at the top.
-		 */
-		tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
-		tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
-		entry_s = &leaf_s->entries[start_s + count];
-		entry_d = &leaf_s->entries[start_s];
-		memcpy(entry_d, entry_s, tmp);
-
-		tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
-		entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
-		ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
-		memset((char *)entry_s, 0, tmp);
-	}
-
-	/*
-	 * Fill in the freemap information
-	 */
-	INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t));
-	INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t));
-	INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
-	INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, (hdr_d->freemap[2].base = 0));
-	INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, (hdr_d->freemap[2].size = 0));
-	hdr_s->holes = 1;	/* leaf may not be compact */
-}
-
-/*
- * Compare two leaf blocks "order".
- */
-int
-xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
-{
-	xfs_dir_leafblock_t *leaf1, *leaf2;
-
-	leaf1 = leaf1_bp->data;
-	leaf2 = leaf2_bp->data;
-	ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) &&
-	       (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC));
-	if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) &&
-	    ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
-	     (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
-	      INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * Pick up the last hashvalue from a leaf block.
- */
-xfs_dahash_t
-xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
-{
-	xfs_dir_leafblock_t *leaf;
-
-	leaf = bp->data;
-	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
-	if (count)
-		*count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
-	if (!leaf->hdr.count)
-		return(0);
-	return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
-}
-
-/*
- * Copy out directory entries for getdents(), for leaf directories.
- */
-int
-xfs_dir_leaf_getdents_int(
-	xfs_dabuf_t	*bp,
-	xfs_inode_t	*dp,
-	xfs_dablk_t	bno,
-	uio_t		*uio,
-	int		*eobp,
-	xfs_dirent_t	*dbp,
-	xfs_dir_put_t	put,
-	xfs_daddr_t		nextda)
-{
-	xfs_dir_leafblock_t	*leaf;
-	xfs_dir_leaf_entry_t	*entry;
-	xfs_dir_leaf_name_t	*namest;
-	int			entno, want_entno, i, nextentno;
-	xfs_mount_t		*mp;
-	xfs_dahash_t		cookhash;
-	xfs_dahash_t		nexthash = 0;
-#if (BITS_PER_LONG == 32)
-	xfs_dahash_t		lasthash = XFS_DA_MAXHASH;
-#endif
-	xfs_dir_put_args_t	p;
-
-	mp = dp->i_mount;
-	leaf = bp->data;
-	if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
-		*eobp = 1;
-		return XFS_ERROR(ENOENT);	/* XXX wrong code */
-	}
-
-	want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
-
-	cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
-
-	xfs_dir_trace_g_dul("leaf: start", dp, uio, leaf);
-
-	/*
-	 * Re-find our place.
-	 */
-	for (i = entno = 0, entry = &leaf->entries[0];
-		     i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
-			     entry++, i++) {
-
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
-				    INT_GET(entry->nameidx, ARCH_CONVERT));
-
-		if (unlikely(
-		    ((char *)namest < (char *)leaf) ||
-		    ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
-			XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)",
-					     XFS_ERRLEVEL_LOW, mp, leaf);
-			xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-		if (INT_GET(entry->hashval, ARCH_CONVERT) >= cookhash) {
-			if (   entno < want_entno
-			    && INT_GET(entry->hashval, ARCH_CONVERT)
-							== cookhash) {
-				/*
-				 * Trying to get to a particular offset in a
-				 * run of equal-hashval entries.
-				 */
-				entno++;
-			} else if (   want_entno > 0
-				   && entno == want_entno
-				   && INT_GET(entry->hashval, ARCH_CONVERT)
-							== cookhash) {
-				break;
-			} else {
-				entno = 0;
-				break;
-			}
-		}
-	}
-
-	if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
-		xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
-		if (!leaf->hdr.info.forw)
-			uio->uio_offset =
-				XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
-		/*
-		 * Don't set uio_offset if there's another block:
-		 * the node code will be setting uio_offset anyway.
-		 */
-		*eobp = 0;
-		return 0;
-	}
-	xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
-
-	p.dbp = dbp;
-	p.put = put;
-	p.uio = uio;
-
-	/*
-	 * We're synchronized, start copying entries out to the user.
-	 */
-	for (; entno >= 0 && i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
-			     entry++, i++, (entno = nextentno)) {
-		int lastresid=0, retval;
-		xfs_dircook_t lastoffset;
-		xfs_dahash_t thishash;
-
-		/*
-		 * Check for a damaged directory leaf block and pick up
-		 * the inode number from this entry.
-		 */
-		namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
-				    INT_GET(entry->nameidx, ARCH_CONVERT));
-
-		if (unlikely(
-		    ((char *)namest < (char *)leaf) ||
-		    ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
-			XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)",
-					     XFS_ERRLEVEL_LOW, mp, leaf);
-			xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-
-		xfs_dir_trace_g_duc("leaf: middle cookie  ",
-						   dp, uio, p.cook.o);
-
-		if (i < (INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1)) {
-			nexthash = INT_GET(entry[1].hashval, ARCH_CONVERT);
-
-			if (nexthash == INT_GET(entry->hashval, ARCH_CONVERT))
-				nextentno = entno + 1;
-			else
-				nextentno = 0;
-			XFS_PUT_COOKIE(p.cook, mp, bno, nextentno, nexthash);
-			xfs_dir_trace_g_duc("leaf: middle cookie  ",
-						   dp, uio, p.cook.o);
-
-		} else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) {
-			xfs_dabuf_t *bp2;
-			xfs_dir_leafblock_t *leaf2;
-
-			ASSERT(nextda != -1);
-
-			retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
-						 nextda, &bp2, XFS_DATA_FORK);
-			if (retval)
-				return retval;
-
-			ASSERT(bp2 != NULL);
-
-			leaf2 = bp2->data;
-
-			if (unlikely(
-			       (be16_to_cpu(leaf2->hdr.info.magic)
-						!= XFS_DIR_LEAF_MAGIC)
-			    || (be32_to_cpu(leaf2->hdr.info.back)
-						!= bno))) {	/* GROT */
-				XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
-						     XFS_ERRLEVEL_LOW, mp,
-						     leaf2);
-				xfs_da_brelse(dp->i_transp, bp2);
-
-				return XFS_ERROR(EFSCORRUPTED);
-			}
-
-			nexthash = INT_GET(leaf2->entries[0].hashval,
-								ARCH_CONVERT);
-			nextentno = -1;
-			XFS_PUT_COOKIE(p.cook, mp, thishash, 0, nexthash);
-			xfs_da_brelse(dp->i_transp, bp2);
-			xfs_dir_trace_g_duc("leaf: next blk cookie",
-						   dp, uio, p.cook.o);
-		} else {
-			nextentno = -1;
-			XFS_PUT_COOKIE(p.cook, mp, 0, 0, XFS_DA_MAXHASH);
-		}
-
-		/*
-		 * Save off the cookie so we can fall back should the
-		 * 'put' into the outgoing buffer fails.  To handle a run
-		 * of equal-hashvals, the off_t structure on 64bit
-		 * builds has entno built into the cookie to ID the
-		 * entry.  On 32bit builds, we only have space for the
-		 * hashval so we can't ID specific entries within a group
-		 * of same hashval entries.   For this, lastoffset is set
-		 * to the first in the run of equal hashvals so we don't
-		 * include any entries unless we can include all entries
-		 * that share the same hashval.  Hopefully the buffer
-		 * provided is big enough to handle it (see pv763517).
-		 */
-#if (BITS_PER_LONG == 32)
-		if ((thishash = INT_GET(entry->hashval, ARCH_CONVERT))
-								!= lasthash) {
-			XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
-			lastresid = uio->uio_resid;
-			lasthash = thishash;
-		} else {
-			xfs_dir_trace_g_duc("leaf: DUP COOKIES, skipped",
-						   dp, uio, p.cook.o);
-		}
-#else
-		thishash = INT_GET(entry->hashval, ARCH_CONVERT);
-		XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
-		lastresid = uio->uio_resid;
-#endif /* BITS_PER_LONG == 32 */
-
-		/*
-		 * Put the current entry into the outgoing buffer.  If we fail
-		 * then restore the UIO to the first entry in the current
-		 * run of equal-hashval entries (probably one 1 entry long).
-		 */
-		p.ino = XFS_GET_DIR_INO8(namest->inumber);
-#if XFS_BIG_INUMS
-		p.ino += mp->m_inoadd;
-#endif
-		p.name = (char *)namest->name;
-		p.namelen = entry->namelen;
-
-		retval = p.put(&p);
-
-		if (!p.done) {
-			uio->uio_offset = lastoffset.o;
-			uio->uio_resid = lastresid;
-
-			*eobp = 1;
-
-			xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
-
-			return retval;
-		}
-	}
-
-	uio->uio_offset = p.cook.o;
-
-	*eobp = 0;
-
-	xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
-
-	return 0;
-}
-
-/*
- * Format a dirent64 structure and copy it out the the user's buffer.
- */
-int
-xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
-{
-	iovec_t *iovp;
-	int reclen, namelen;
-	xfs_dirent_t *idbp;
-	uio_t *uio;
-
-	namelen = pa->namelen;
-	reclen = DIRENTSIZE(namelen);
-	uio = pa->uio;
-	if (reclen > uio->uio_resid) {
-		pa->done = 0;
-		return 0;
-	}
-	iovp = uio->uio_iov;
-	idbp = (xfs_dirent_t *)iovp->iov_base;
-	iovp->iov_base = (char *)idbp + reclen;
-	iovp->iov_len -= reclen;
-	uio->uio_resid -= reclen;
-	idbp->d_reclen = reclen;
-	idbp->d_ino = pa->ino;
-	idbp->d_off = pa->cook.o;
-	idbp->d_name[namelen] = '\0';
-	pa->done = 1;
-	memcpy(idbp->d_name, pa->name, namelen);
-	return 0;
-}
-
-/*
- * Format a dirent64 structure and copy it out the the user's buffer.
- */
-int
-xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
-{
-	int		retval, reclen, namelen;
-	xfs_dirent_t	*idbp;
-	uio_t		*uio;
-
-	namelen = pa->namelen;
-	reclen = DIRENTSIZE(namelen);
-	uio = pa->uio;
-	if (reclen > uio->uio_resid) {
-		pa->done = 0;
-		return 0;
-	}
-	idbp = pa->dbp;
-	idbp->d_reclen = reclen;
-	idbp->d_ino = pa->ino;
-	idbp->d_off = pa->cook.o;
-	idbp->d_name[namelen] = '\0';
-	memcpy(idbp->d_name, pa->name, namelen);
-	retval = uio_read((caddr_t)idbp, reclen, uio);
-	pa->done = (retval == 0);
-	return retval;
-}
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
deleted file mode 100644
index eb8cd9a4667..00000000000
--- a/fs/xfs/xfs_dir_leaf.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR_LEAF_H__
-#define	__XFS_DIR_LEAF_H__
-
-/*
- * Directory layout, internal structure, access macros, etc.
- *
- * Large directories are structured around Btrees where all the data
- * elements are in the leaf nodes.  Filenames are hashed into an int,
- * then that int is used as the index into the Btree.  Since the hashval
- * of a filename may not be unique, we may have duplicate keys.  The
- * internal links in the Btree are logical block offsets into the file.
- */
-
-struct uio;
-struct xfs_bmap_free;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_da_state;
-struct xfs_da_state_blk;
-struct xfs_dir_put_args;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*========================================================================
- * Directory Structure when equal to XFS_LBSIZE(mp) bytes.
- *========================================================================*/
-
-/*
- * This is the structure of the leaf nodes in the Btree.
- *
- * Struct leaf_entry's are packed from the top.  Names grow from the bottom
- * but are not packed.  The freemap contains run-length-encoded entries
- * for the free bytes after the leaf_entry's, but only the N largest such,
- * smaller runs are dropped.  When the freemap doesn't show enough space
- * for an allocation, we compact the namelist area and try again.  If we
- * still don't have enough space, then we have to split the block.
- *
- * Since we have duplicate hash keys, for each key that matches, compare
- * the actual string.  The root and intermediate node search always takes
- * the first-in-the-block key match found, so we should only have to work
- * "forw"ard.  If none matches, continue with the "forw"ard leaf nodes
- * until the hash key changes or the filename is found.
- *
- * The parent directory and the self-pointer are explicitly represented
- * (ie: there are entries for "." and "..").
- *
- * Note that the count being a __uint16_t limits us to something like a
- * blocksize of 1.3MB in the face of worst case (short) filenames.
- */
-#define XFS_DIR_LEAF_MAPSIZE	3	/* how many freespace slots */
-
-typedef struct xfs_dir_leaf_map {	/* RLE map of free bytes */
-	__uint16_t	base;	 	/* base of free region */
-	__uint16_t	size; 		/* run length of free region */
-} xfs_dir_leaf_map_t;
-
-typedef struct xfs_dir_leaf_hdr {	/* constant-structure header block */
-	xfs_da_blkinfo_t info;		/* block type, links, etc. */
-	__uint16_t	count;		/* count of active leaf_entry's */
-	__uint16_t	namebytes;	/* num bytes of name strings stored */
-	__uint16_t	firstused;	/* first used byte in name area */
-	__uint8_t	holes;		/* != 0 if blk needs compaction */
-	__uint8_t	pad1;
-	xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
-} xfs_dir_leaf_hdr_t;
-
-typedef struct xfs_dir_leaf_entry {	/* sorted on key, not name */
-	xfs_dahash_t	hashval;	/* hash value of name */
-	__uint16_t	nameidx;	/* index into buffer of name */
-	__uint8_t	namelen;	/* length of name string */
-	__uint8_t	pad2;
-} xfs_dir_leaf_entry_t;
-
-typedef struct xfs_dir_leaf_name {
-	xfs_dir_ino_t	inumber;	/* inode number for this key */
-	__uint8_t	name[1];	/* name string itself */
-} xfs_dir_leaf_name_t;
-
-typedef struct xfs_dir_leafblock {
-	xfs_dir_leaf_hdr_t	hdr;	/* constant-structure header block */
-	xfs_dir_leaf_entry_t	entries[1];	/* var sized array */
-	xfs_dir_leaf_name_t	namelist[1];	/* grows from bottom of buf */
-} xfs_dir_leafblock_t;
-
-/*
- * Length of name for which a 512-byte block filesystem
- * can get a double split.
- */
-#define	XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN	\
-	(512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \
-	 (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \
-	 (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1)
-
-typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa);
-
-typedef union {
-	xfs_off_t		o;		/* offset (cookie) */
-	/*
-	 * Watch the order here (endian-ness dependent).
-	 */
-	struct {
-#ifndef XFS_NATIVE_HOST
-		xfs_dahash_t	h;	/* hash value */
-		__uint32_t	be;	/* block and entry */
-#else
-		__uint32_t	be;	/* block and entry */
-		xfs_dahash_t	h;	/* hash value */
-#endif /* XFS_NATIVE_HOST */
-	} s;
-} xfs_dircook_t;
-
-#define	XFS_PUT_COOKIE(c,mp,bno,entry,hash)	\
-	((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
-
-typedef struct xfs_dir_put_args {
-	xfs_dircook_t	cook;		/* cookie of (next) entry */
-	xfs_intino_t	ino;		/* inode number */
-	struct xfs_dirent *dbp;		/* buffer pointer */
-	char		*name;		/* directory entry name */
-	int		namelen;	/* length of name */
-	int		done;		/* output: set if value was stored */
-	xfs_dir_put_t	put;		/* put function ptr (i/o) */
-	struct uio	*uio;		/* uio control structure */
-} xfs_dir_put_args_t;
-
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	\
-	xfs_dir_leaf_entsize_byname(len)
-static inline int xfs_dir_leaf_entsize_byname(int len)
-{
-	return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
-}
-
-#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)	\
-	xfs_dir_leaf_entsize_byentry(entry)
-static inline int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry)
-{
-	return (uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen;
-}
-
-#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)	\
-	xfs_dir_leaf_namestruct(leafp,offset)
-static inline xfs_dir_leaf_name_t *
-xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset)
-{
-	return (xfs_dir_leaf_name_t *)&((char *)(leafp))[offset];
-}
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Internal routines when dirsize < XFS_LITINO(mp).
- */
-int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent);
-int xfs_dir_shortform_addname(struct xfs_da_args *args);
-int xfs_dir_shortform_lookup(struct xfs_da_args *args);
-int xfs_dir_shortform_to_leaf(struct xfs_da_args *args);
-int xfs_dir_shortform_removename(struct xfs_da_args *args);
-int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
-			       struct xfs_dirent *dbp, xfs_dir_put_t put);
-int xfs_dir_shortform_replace(struct xfs_da_args *args);
-
-/*
- * Internal routines when dirsize == XFS_LBSIZE(mp).
- */
-int xfs_dir_leaf_to_node(struct xfs_da_args *args);
-int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
-
-/*
- * Routines used for growing the Btree.
- */
-int	xfs_dir_leaf_split(struct xfs_da_state *state,
-				  struct xfs_da_state_blk *oldblk,
-				  struct xfs_da_state_blk *newblk);
-int	xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer,
-				struct xfs_da_args *args, int insertion_index);
-int	xfs_dir_leaf_addname(struct xfs_da_args *args);
-int	xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer,
-				       struct xfs_da_args *args,
-				       int *index_found_at);
-int	xfs_dir_leaf_remove(struct xfs_trans *trans,
-				   struct xfs_dabuf *leaf_buffer,
-				   int index_to_remove);
-int	xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp,
-					 xfs_dablk_t bno, struct uio *uio,
-					 int *eobp, struct xfs_dirent *dbp,
-					 xfs_dir_put_t put, xfs_daddr_t nextda);
-
-/*
- * Routines used for shrinking the Btree.
- */
-int	xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval);
-void	xfs_dir_leaf_unbalance(struct xfs_da_state *state,
-					     struct xfs_da_state_blk *drop_blk,
-					     struct xfs_da_state_blk *save_blk);
-
-/*
- * Utility routines.
- */
-uint	xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count);
-int	xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp,
-				  struct xfs_dabuf *leaf2_bp);
-int	xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa);
-int	xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa);
-int	xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
-
-/*
- * Global data.
- */
-extern xfs_dahash_t	xfs_dir_hash_dot, xfs_dir_hash_dotdot;
-
-#endif /* __XFS_DIR_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir_sf.h b/fs/xfs/xfs_dir_sf.h
deleted file mode 100644
index 5b20b4d3f57..00000000000
--- a/fs/xfs/xfs_dir_sf.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR_SF_H__
-#define	__XFS_DIR_SF_H__
-
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to
- * fit into the literal area of the inode.
- */
-
-typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
-
-/*
- * The parent directory has a dedicated field, and the self-pointer must
- * be calculated on the fly.
- *
- * Entries are packed toward the top as tight as possible.  The header
- * and the elements much be memcpy'd out into a work area to get correct
- * alignment for the inode number fields.
- */
-typedef struct xfs_dir_sf_hdr {		/* constant-structure header block */
-	xfs_dir_ino_t	parent;		/* parent dir inode number */
-	__uint8_t	count;		/* count of active entries */
-} xfs_dir_sf_hdr_t;
-
-typedef struct xfs_dir_sf_entry {
-	xfs_dir_ino_t	inumber;	/* referenced inode number */
-	__uint8_t	namelen;	/* actual length of name (no NULL) */
-	__uint8_t	name[1];	/* name */
-} xfs_dir_sf_entry_t;
-
-typedef struct xfs_dir_shortform {
-	xfs_dir_sf_hdr_t	hdr;
-	xfs_dir_sf_entry_t	list[1];	/* variable sized array */
-} xfs_dir_shortform_t;
-
-/*
- * We generate this then sort it, so that readdirs are returned in
- * hash-order.  Else seekdir won't work.
- */
-typedef struct xfs_dir_sf_sort {
-	__uint8_t	entno;		/* .=0, ..=1, else entry# + 2 */
-	__uint8_t	seqno;		/* sequence # with same hash value */
-	__uint8_t	namelen;	/* length of name value (no null) */
-	xfs_dahash_t	hash;		/* this entry's hash value */
-	xfs_intino_t	ino;		/* this entry's inode number */
-	char		*name;		/* name value, pointer into buffer */
-} xfs_dir_sf_sort_t;
-
-#define	XFS_DIR_SF_GET_DIRINO(from,to)	xfs_dir_sf_get_dirino(from, to)
-static inline void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to)
-{
-	*(to) = XFS_GET_DIR_INO8(*from);
-}
-
-#define	XFS_DIR_SF_PUT_DIRINO(from,to)	xfs_dir_sf_put_dirino(from, to)
-static inline void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to)
-{
-	XFS_PUT_DIR_INO8(*(from), *(to));
-}
-
-#define XFS_DIR_SF_ENTSIZE_BYNAME(len)	xfs_dir_sf_entsize_byname(len)
-static inline int xfs_dir_sf_entsize_byname(int len)
-{
-	return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (len);
-}
-
-#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)	xfs_dir_sf_entsize_byentry(sfep)
-static inline int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep)
-{
-	return (uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen;
-}
-
-#define XFS_DIR_SF_NEXTENTRY(sfep)		xfs_dir_sf_nextentry(sfep)
-static inline xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep)
-{
-	return (xfs_dir_sf_entry_t *) \
-		((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep));
-}
-
-#define XFS_DIR_SF_ALLFIT(count,totallen)	\
-	xfs_dir_sf_allfit(count,totallen)
-static inline int xfs_dir_sf_allfit(int count, int totallen)
-{
-	return ((uint)sizeof(xfs_dir_sf_hdr_t) + \
-	       ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen));
-}
-
-#if defined(XFS_DIR_TRACE)
-
-/*
- * Kernel tracing support for directories.
- */
-struct uio;
-struct xfs_inode;
-struct xfs_da_intnode;
-struct xfs_dinode;
-struct xfs_dir_leafblock;
-struct xfs_dir_leaf_entry;
-
-#define	XFS_DIR_TRACE_SIZE	4096	/* size of global trace buffer */
-extern ktrace_t	*xfs_dir_trace_buf;
-
-/*
- * Trace record types.
- */
-#define	XFS_DIR_KTRACE_G_DU	1	/* dp, uio */
-#define	XFS_DIR_KTRACE_G_DUB	2	/* dp, uio, bno */
-#define	XFS_DIR_KTRACE_G_DUN	3	/* dp, uio, node */
-#define	XFS_DIR_KTRACE_G_DUL	4	/* dp, uio, leaf */
-#define	XFS_DIR_KTRACE_G_DUE	5	/* dp, uio, leaf entry */
-#define	XFS_DIR_KTRACE_G_DUC	6	/* dp, uio, cookie */
-
-void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio);
-void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio,
-			      xfs_dablk_t bno);
-void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio,
-			      struct xfs_da_intnode *node);
-void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio,
-			      struct xfs_dir_leafblock *leaf);
-void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio,
-			      struct xfs_dir_leaf_entry *entry);
-void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio,
-			      xfs_off_t cookie);
-void xfs_dir_trace_enter(int type, char *where,
-			     void *a0, void *a1, void *a2, void *a3,
-			     void *a4, void *a5, void *a6, void *a7,
-			     void *a8, void *a9, void *a10, void *a11);
-#else
-#define	xfs_dir_trace_g_du(w,d,u)
-#define	xfs_dir_trace_g_dub(w,d,u,b)
-#define	xfs_dir_trace_g_dun(w,d,u,n)
-#define	xfs_dir_trace_g_dul(w,d,u,l)
-#define	xfs_dir_trace_g_due(w,d,u,e)
-#define	xfs_dir_trace_g_duc(w,d,u,c)
-#endif /* DEBUG */
-
-#endif	/* __XFS_DIR_SF_H__ */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 00b1540f810..4e7865ad6f0 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -189,6 +189,6 @@ typedef enum {
 #define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
 
 
-extern struct bhv_vfsops xfs_dmops;
+extern struct bhv_module_vfsops xfs_dmops;
 
 #endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index 629795b3b3d..1e4a35ddf7f 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 2a21c502401..b95681b03d8 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,12 +22,10 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index f19282ec854..6cf6d8769b9 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
@@ -294,6 +293,62 @@ xfs_efi_init(xfs_mount_t	*mp,
 }
 
 /*
+ * Copy an EFI format buffer from the given buf, and into the destination
+ * EFI format structure.
+ * The given buffer can be in 32 bit or 64 bit form (which has different padding),
+ * one of which will be the native format for this kernel.
+ * It will handle the conversion of formats if necessary.
+ */
+int
+xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
+{
+	xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr;
+	uint i;
+	uint len = sizeof(xfs_efi_log_format_t) + 
+		(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);  
+	uint len32 = sizeof(xfs_efi_log_format_32_t) + 
+		(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t);  
+	uint len64 = sizeof(xfs_efi_log_format_64_t) + 
+		(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t);  
+
+	if (buf->i_len == len) {
+		memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
+		return 0;
+	} else if (buf->i_len == len32) {
+		xfs_efi_log_format_32_t *src_efi_fmt_32 =
+			(xfs_efi_log_format_32_t *)buf->i_addr;
+
+		dst_efi_fmt->efi_type     = src_efi_fmt_32->efi_type;
+		dst_efi_fmt->efi_size     = src_efi_fmt_32->efi_size;
+		dst_efi_fmt->efi_nextents = src_efi_fmt_32->efi_nextents;
+		dst_efi_fmt->efi_id       = src_efi_fmt_32->efi_id;
+		for (i = 0; i < dst_efi_fmt->efi_nextents; i++) {
+			dst_efi_fmt->efi_extents[i].ext_start =
+				src_efi_fmt_32->efi_extents[i].ext_start;
+			dst_efi_fmt->efi_extents[i].ext_len =
+				src_efi_fmt_32->efi_extents[i].ext_len;
+		}
+		return 0;
+	} else if (buf->i_len == len64) {
+		xfs_efi_log_format_64_t *src_efi_fmt_64 =
+			(xfs_efi_log_format_64_t *)buf->i_addr;
+
+		dst_efi_fmt->efi_type     = src_efi_fmt_64->efi_type;
+		dst_efi_fmt->efi_size     = src_efi_fmt_64->efi_size;
+		dst_efi_fmt->efi_nextents = src_efi_fmt_64->efi_nextents;
+		dst_efi_fmt->efi_id       = src_efi_fmt_64->efi_id;
+		for (i = 0; i < dst_efi_fmt->efi_nextents; i++) {
+			dst_efi_fmt->efi_extents[i].ext_start =
+				src_efi_fmt_64->efi_extents[i].ext_start;
+			dst_efi_fmt->efi_extents[i].ext_len =
+				src_efi_fmt_64->efi_extents[i].ext_len;
+		}
+		return 0;
+	}
+	return EFSCORRUPTED;
+}
+
+/*
  * This is called by the efd item code below to release references to
  * the given efi item.  Each efd calls this with the number of
  * extents that it has logged, and when the sum of these reaches
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 5bf681708fe..0ea45edaab0 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -27,6 +27,24 @@ typedef struct xfs_extent {
 } xfs_extent_t;
 
 /*
+ * Since an xfs_extent_t has types (start:64, len: 32)
+ * there are different alignments on 32 bit and 64 bit kernels.
+ * So we provide the different variants for use by a
+ * conversion routine.
+ */
+
+typedef struct xfs_extent_32 {
+	xfs_dfsbno_t	ext_start;
+	xfs_extlen_t	ext_len;
+} __attribute__((packed)) xfs_extent_32_t;
+
+typedef struct xfs_extent_64 {
+	xfs_dfsbno_t	ext_start;
+	xfs_extlen_t	ext_len;
+	__uint32_t	ext_pad;
+} xfs_extent_64_t;
+
+/*
  * This is the structure used to lay out an efi log item in the
  * log.  The efi_extents field is a variable size array whose
  * size is given by efi_nextents.
@@ -39,6 +57,22 @@ typedef struct xfs_efi_log_format {
 	xfs_extent_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_t;
 
+typedef struct xfs_efi_log_format_32 {
+	unsigned short		efi_type;	/* efi log item type */
+	unsigned short		efi_size;	/* size of this item */
+	uint			efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
+} __attribute__((packed)) xfs_efi_log_format_32_t;
+
+typedef struct xfs_efi_log_format_64 {
+	unsigned short		efi_type;	/* efi log item type */
+	unsigned short		efi_size;	/* size of this item */
+	uint			efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
+} xfs_efi_log_format_64_t;
+
 /*
  * This is the structure used to lay out an efd log item in the
  * log.  The efd_extents array is a variable size array whose
@@ -52,6 +86,22 @@ typedef struct xfs_efd_log_format {
 	xfs_extent_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_t;
 
+typedef struct xfs_efd_log_format_32 {
+	unsigned short		efd_type;	/* efd log item type */
+	unsigned short		efd_size;	/* size of this item */
+	uint			efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
+} __attribute__((packed)) xfs_efd_log_format_32_t;
+
+typedef struct xfs_efd_log_format_64 {
+	unsigned short		efd_type;	/* efd log item type */
+	unsigned short		efd_size;	/* size of this item */
+	uint			efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
+} xfs_efd_log_format_64_t;
+
 
 #ifdef __KERNEL__
 
@@ -103,7 +153,8 @@ extern struct kmem_zone	*xfs_efd_zone;
 xfs_efi_log_item_t	*xfs_efi_init(struct xfs_mount *, uint);
 xfs_efd_log_item_t	*xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
 				      uint);
-
+int			xfs_efi_copy_format(xfs_log_iovec_t *buf,
+					    xfs_efi_log_format_t *dst_efi_fmt);
 void			xfs_efi_item_free(xfs_efi_log_item_t *);
 
 #endif	/* __KERNEL__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 14010f1fa82..0f0ad153595 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -67,14 +67,15 @@ struct fsxattr {
 #define XFS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
 #define XFS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
 #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+#define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
 #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /*
  * Structure for XFS_IOC_GETBMAP.
  * On input, fill in bmv_offset and bmv_length of the first structure
- * to indicate the area of interest in the file, and bmv_entry with the
- * number of array elements given.  The first structure is updated on
- * return to give the offset and length for the next call.
+ * to indicate the area of interest in the file, and bmv_entries with
+ * the number of array elements given back.  The first structure is
+ * updated on return to give the offset and length for the next call.
  */
 #ifndef HAVE_GETBMAP
 struct getbmap {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index dfa3527b20a..077629bab53 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -542,14 +540,13 @@ xfs_reserve_blocks(
 }
 
 void
-xfs_fs_log_dummy(xfs_mount_t *mp)
+xfs_fs_log_dummy(
+	xfs_mount_t	*mp)
 {
-	xfs_trans_t *tp;
-	xfs_inode_t *ip;
-
+	xfs_trans_t	*tp;
+	xfs_inode_t	*ip;
 
 	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
-	atomic_inc(&mp->m_active_trans);
 	if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
 		xfs_trans_cancel(tp, 0);
 		return;
@@ -574,21 +571,22 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct vfs *vfsp = XFS_MTOVFS(mp);
+		struct bhv_vfs *vfsp = XFS_MTOVFS(mp);
 		struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
 
 		if (sb && !IS_ERR(sb)) {
-			xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
 			thaw_bdev(sb->s_bdev, sb);
 		}
 	
 		break;
 	}
 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
-		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+		xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
 		break;
 	case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
-		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT|XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(mp,
+				SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
 		break;
 	default:
 		return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index deddbd03c16..33164a85aa9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -1174,6 +1172,9 @@ xfs_dilocate(
 	if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
 	    ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
 #ifdef DEBUG
+		/* no diagnostics for bulkstat, ino comes from userspace */
+		if (flags & XFS_IMAP_BULKSTAT)
+			return XFS_ERROR(EINVAL);
 		if (agno >= mp->m_sb.sb_agcount) {
 			xfs_fs_cmn_err(CE_ALERT, mp,
 					"xfs_dilocate: agno (%d) >= "
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 60c65683462..616eeeb6953 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b5385432526..0724df7fabb 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -186,7 +184,7 @@ xfs_ihash_promote(
  */
 STATIC int
 xfs_iget_core(
-	vnode_t		*vp,
+	bhv_vnode_t	*vp,
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
 	xfs_ino_t	ino,
@@ -198,7 +196,7 @@ xfs_iget_core(
 	xfs_ihash_t	*ih;
 	xfs_inode_t	*ip;
 	xfs_inode_t	*iq;
-	vnode_t		*inode_vp;
+	bhv_vnode_t	*inode_vp;
 	ulong		version;
 	int		error;
 	/* REFERENCED */
@@ -468,7 +466,7 @@ finish_inode:
 	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
 	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
 	 */
-	VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
+	bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
 
 	return 0;
 }
@@ -489,7 +487,7 @@ xfs_iget(
 	xfs_daddr_t	bno)
 {
 	struct inode	*inode;
-	vnode_t		*vp = NULL;
+	bhv_vnode_t	*vp = NULL;
 	int		error;
 
 	XFS_STATS_INC(xs_ig_attempts);
@@ -543,7 +541,7 @@ retry:
 void
 xfs_inode_lock_init(
 	xfs_inode_t	*ip,
-	vnode_t		*vp)
+	bhv_vnode_t	*vp)
 {
 	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 		     "xfsino", (long)vp->v_number);
@@ -603,12 +601,10 @@ void
 xfs_iput(xfs_inode_t	*ip,
 	 uint		lock_flags)
 {
-	vnode_t	*vp = XFS_ITOV(ip);
+	bhv_vnode_t	*vp = XFS_ITOV(ip);
 
 	vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
-
 	xfs_iunlock(ip, lock_flags);
-
 	VN_RELE(vp);
 }
 
@@ -619,7 +615,7 @@ void
 xfs_iput_new(xfs_inode_t	*ip,
 	     uint		lock_flags)
 {
-	vnode_t		*vp = XFS_ITOV(ip);
+	bhv_vnode_t	*vp = XFS_ITOV(ip);
 	struct inode	*inode = vn_to_inode(vp);
 
 	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
@@ -645,7 +641,7 @@ xfs_iput_new(xfs_inode_t	*ip,
 void
 xfs_ireclaim(xfs_inode_t *ip)
 {
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	/*
 	 * Remove from old hash list and mount list.
@@ -1033,6 +1029,6 @@ xfs_iflock_nowait(xfs_inode_t *ip)
 void
 xfs_ifunlock(xfs_inode_t *ip)
 {
-	ASSERT(valusema(&(ip->i_flock)) <= 0);
+	ASSERT(issemalocked(&(ip->i_flock)));
 	vsema(&(ip->i_flock));
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 94b60dd0380..5fa0adb7e17 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -26,14 +26,12 @@
 #include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -256,13 +254,11 @@ xfs_itobp(
 	xfs_daddr_t	bno,
 	uint		imap_flags)
 {
+	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	xfs_imap_t	imap;
-#ifdef __KERNEL__
 	int		i;
 	int		ni;
-#endif
 
 	if (ip->i_blkno == (xfs_daddr_t)0) {
 		/*
@@ -319,7 +315,6 @@ xfs_itobp(
 	 */
 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
 				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-
 	if (error) {
 #ifdef DEBUG
 		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
@@ -330,17 +325,21 @@ xfs_itobp(
 #endif /* DEBUG */
 		return error;
 	}
-#ifdef __KERNEL__
+
 	/*
 	 * Validate the magic number and version of every inode in the buffer
 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 * No validation is done here in userspace (xfs_repair).
 	 */
-#ifdef DEBUG
+#if !defined(__KERNEL__)
+	ni = 0;
+#elif defined(DEBUG)
 	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 :
 		(BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
-#else
+#else	/* usual case */
 	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1;
 #endif
+
 	for (i = 0; i < ni; i++) {
 		int		di_ok;
 		xfs_dinode_t	*dip;
@@ -352,8 +351,11 @@ xfs_itobp(
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
 				 XFS_RANDOM_ITOBP_INOTOBP))) {
 #ifdef DEBUG
-			prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)",
-				mp->m_ddev_targp,
+			if (!(imap_flags & XFS_IMAP_BULKSTAT))
+				cmn_err(CE_ALERT,
+					"Device %s - bad inode magic/vsn "
+					"daddr %lld #%d (magic=%x)",
+				XFS_BUFTARG_NAME(mp->m_ddev_targp),
 				(unsigned long long)imap.im_blkno, i,
 				INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
 #endif
@@ -363,7 +365,6 @@ xfs_itobp(
 			return XFS_ERROR(EFSCORRUPTED);
 		}
 	}
-#endif	/* __KERNEL__ */
 
 	xfs_inobp_check(mp, bp);
 
@@ -782,7 +783,6 @@ xfs_xlate_dinode_core(
 
 STATIC uint
 _xfs_dic2xflags(
-	xfs_dinode_core_t	*dic,
 	__uint16_t		di_flags)
 {
 	uint			flags = 0;
@@ -812,6 +812,8 @@ _xfs_dic2xflags(
 			flags |= XFS_XFLAG_EXTSIZE;
 		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
 			flags |= XFS_XFLAG_EXTSZINHERIT;
+		if (di_flags & XFS_DIFLAG_NODEFRAG)
+			flags |= XFS_XFLAG_NODEFRAG;
 	}
 
 	return flags;
@@ -823,16 +825,16 @@ xfs_ip2xflags(
 {
 	xfs_dinode_core_t	*dic = &ip->i_d;
 
-	return _xfs_dic2xflags(dic, dic->di_flags) |
-		(XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
+	return _xfs_dic2xflags(dic->di_flags) |
+				(XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
 }
 
 uint
 xfs_dic2xflags(
 	xfs_dinode_core_t	*dic)
 {
-	return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) |
-		(XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
+	return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) |
+				(XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
 }
 
 /*
@@ -1083,7 +1085,7 @@ xfs_ialloc(
 {
 	xfs_ino_t	ino;
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 	uint		flags;
 	int		error;
 
@@ -1221,6 +1223,9 @@ xfs_ialloc(
 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
 			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 				di_flags |= XFS_DIFLAG_PROJINHERIT;
+			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
+			    xfs_inherit_nodefrag)
+				di_flags |= XFS_DIFLAG_NODEFRAG;
 			ip->i_d.di_flags |= di_flags;
 		}
 		/* FALLTHROUGH */
@@ -1244,8 +1249,8 @@ xfs_ialloc(
 	 */
 	xfs_trans_log_inode(tp, ip, flags);
 
-	/* now that we have an i_mode  we can set Linux inode ops (& unlock) */
-	VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
+	/* now that we have an i_mode we can setup inode ops and unlock */
+	bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
 
 	*ipp = ip;
 	return 0;
@@ -1285,7 +1290,7 @@ xfs_isize_check(
 				       (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
 			  map_first),
 			 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
-			 NULL))
+			 NULL, NULL))
 	    return;
 	ASSERT(nimaps == 1);
 	ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1421,7 +1426,7 @@ xfs_itruncate_start(
 	xfs_fsize_t	last_byte;
 	xfs_off_t	toss_start;
 	xfs_mount_t	*mp;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
 	ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
@@ -1434,9 +1439,9 @@ xfs_itruncate_start(
 	vn_iowait(vp);  /* wait for the completion of any pending DIOs */
 	
 	/*
-	 * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers
+	 * Call toss_pages or flushinval_pages to get rid of pages
 	 * overlapping the region being removed.  We have to use
-	 * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the
+	 * the less efficient flushinval_pages in the case that the
 	 * caller may not be able to finish the truncate without
 	 * dropping the inode's I/O lock.  Make sure
 	 * to catch any pages brought in by buffers overlapping
@@ -1445,10 +1450,10 @@ xfs_itruncate_start(
 	 * so that we don't toss things on the same block as
 	 * new_size but before it.
 	 *
-	 * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to
+	 * Before calling toss_page or flushinval_pages, make sure to
 	 * call remapf() over the same region if the file is mapped.
 	 * This frees up mapped file references to the pages in the
-	 * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures
+	 * given range and for the flushinval_pages case it ensures
 	 * that we get the latest mapped changes flushed out.
 	 */
 	toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
@@ -1466,9 +1471,9 @@ xfs_itruncate_start(
 			 last_byte);
 	if (last_byte > toss_start) {
 		if (flags & XFS_ITRUNC_DEFINITE) {
-			VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED);
+			bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
 		} else {
-			VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED);
+			bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
 		}
 	}
 
@@ -1666,12 +1671,13 @@ xfs_itruncate_finish(
 		 * runs.
 		 */
 		XFS_BMAP_INIT(&free_list, &first_block);
-		error = xfs_bunmapi(ntp, ip, first_unmap_block,
-				    unmap_len,
+		error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore,
+				    first_unmap_block, unmap_len,
 				    XFS_BMAPI_AFLAG(fork) |
 				      (sync ? 0 : XFS_BMAPI_ASYNC),
 				    XFS_ITRUNC_MAX_EXTENTS,
-				    &first_block, &free_list, &done);
+				    &first_block, &free_list,
+				    NULL, &done);
 		if (error) {
 			/*
 			 * If the bunmapi call encounters an error,
@@ -2745,13 +2751,14 @@ xfs_iunpin(
 		 * the inode to become unpinned.
 		 */
 		if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
-			vnode_t	*vp = XFS_ITOV_NULL(ip);
+			bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
 
 			/* make sync come back and flush this inode */
 			if (vp) {
 				struct inode	*inode = vn_to_inode(vp);
 
-				if (!(inode->i_state & I_NEW))
+				if (!(inode->i_state &
+						(I_NEW|I_FREEING|I_CLEAR)))
 					mark_inode_dirty_sync(inode);
 			}
 		}
@@ -2916,13 +2923,6 @@ xfs_iflush_fork(
 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
 		}
-		if (whichfork == XFS_DATA_FORK) {
-			if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) {
-				XFS_ERROR_REPORT("xfs_iflush_fork",
-						 XFS_ERRLEVEL_LOW, mp);
-				return XFS_ERROR(EFSCORRUPTED);
-			}
-		}
 		break;
 
 	case XFS_DINODE_FMT_EXTENTS:
@@ -3006,7 +3006,7 @@ xfs_iflush(
 	XFS_STATS_INC(xs_iflush_count);
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
-	ASSERT(valusema(&ip->i_flock) <= 0);
+	ASSERT(issemalocked(&(ip->i_flock)));
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
 
@@ -3199,7 +3199,7 @@ xfs_iflush(
 
 corrupt_out:
 	xfs_buf_relse(bp);
-	xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 	xfs_iflush_abort(ip);
 	/*
 	 * Unlocks the flush lock
@@ -3221,7 +3221,7 @@ cluster_corrupt_out:
 		xfs_buf_relse(bp);
 	}
 
-	xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 
 	if(!bufwasdelwri)  {
 		/*
@@ -3264,7 +3264,7 @@ xfs_iflush_int(
 	SPLDECL(s);
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
-	ASSERT(valusema(&ip->i_flock) <= 0);
+	ASSERT(issemalocked(&(ip->i_flock)));
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
 
@@ -3504,7 +3504,7 @@ xfs_iflush_all(
 	xfs_mount_t	*mp)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
  again:
 	XFS_MOUNT_ILOCK(mp);
@@ -4180,7 +4180,7 @@ xfs_iext_direct_to_inline(
 	 */
 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
 		nextents * sizeof(xfs_bmbt_rec_t));
-	kmem_free(ifp->if_u1.if_extents, KM_SLEEP);
+	kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
 	ifp->if_real_bytes = 0;
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3b544db1790..d10b76ed1e5 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -102,9 +102,9 @@ typedef struct xfs_ifork {
 
 #ifdef __KERNEL__
 struct bhv_desc;
+struct bhv_vnode;
 struct cred;
 struct ktrace;
-struct vnode;
 struct xfs_buf;
 struct xfs_bmap_free;
 struct xfs_bmbt_irec;
@@ -400,7 +400,7 @@ void		xfs_chash_init(struct xfs_mount *);
 void		xfs_chash_free(struct xfs_mount *);
 xfs_inode_t	*xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
 				  struct xfs_trans *);
-void            xfs_inode_lock_init(xfs_inode_t *, struct vnode *);
+void            xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
 int		xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			 uint, uint, xfs_inode_t **, xfs_daddr_t);
 void		xfs_iput(xfs_inode_t *, uint);
@@ -461,7 +461,7 @@ void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
 void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
-xfs_inode_t	*xfs_vtoi(struct vnode *vp);
+xfs_inode_t	*xfs_vtoi(struct bhv_vnode *vp);
 
 void		xfs_synchronize_atime(xfs_inode_t *);
 
@@ -509,7 +509,6 @@ extern struct kmem_zone	*xfs_chashlist_zone;
 extern struct kmem_zone	*xfs_ifork_zone;
 extern struct kmem_zone	*xfs_inode_zone;
 extern struct kmem_zone	*xfs_ili_zone;
-extern struct vnodeops	xfs_vnodeops;
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7497a481b2f..f8e80d8e723 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -25,7 +25,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -33,7 +32,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -794,7 +792,7 @@ xfs_inode_item_pushbuf(
 	 * inode flush completed and the inode was taken off the AIL.
 	 * So, just get out.
 	 */
-	if ((valusema(&(ip->i_flock)) > 0)  ||
+	if (!issemalocked(&(ip->i_flock)) ||
 	    ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
 		iip->ili_pushbuf_flag = 0;
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -816,7 +814,7 @@ xfs_inode_item_pushbuf(
 			 * If not, we can flush it async.
 			 */
 			dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
-				  (valusema(&(ip->i_flock)) <= 0));
+				  issemalocked(&(ip->i_flock)));
 			iip->ili_pushbuf_flag = 0;
 			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 			xfs_buftrace("INODE ITEM PUSH", bp);
@@ -864,7 +862,7 @@ xfs_inode_item_push(
 	ip = iip->ili_inode;
 
 	ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
-	ASSERT(valusema(&(ip->i_flock)) <= 0);
+	ASSERT(issemalocked(&(ip->i_flock)));
 	/*
 	 * Since we were able to lock the inode's flush lock and
 	 * we found it on the AIL, the inode must be dirty.  This
@@ -1084,3 +1082,52 @@ xfs_istale_done(
 {
 	xfs_iflush_abort(iip->ili_inode);
 }
+
+/*
+ * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
+ * (which can have different field alignments) to the native version
+ */
+int
+xfs_inode_item_format_convert(
+	xfs_log_iovec_t		*buf,
+	xfs_inode_log_format_t	*in_f)
+{
+	if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
+		xfs_inode_log_format_32_t *in_f32;
+
+		in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
+		in_f->ilf_type = in_f32->ilf_type;
+		in_f->ilf_size = in_f32->ilf_size;
+		in_f->ilf_fields = in_f32->ilf_fields;
+		in_f->ilf_asize = in_f32->ilf_asize;
+		in_f->ilf_dsize = in_f32->ilf_dsize;
+		in_f->ilf_ino = in_f32->ilf_ino;
+		/* copy biggest field of ilf_u */
+		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+		       in_f32->ilf_u.ilfu_uuid.__u_bits,
+		       sizeof(uuid_t));
+		in_f->ilf_blkno = in_f32->ilf_blkno;
+		in_f->ilf_len = in_f32->ilf_len;
+		in_f->ilf_boffset = in_f32->ilf_boffset;
+		return 0;
+	} else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
+		xfs_inode_log_format_64_t *in_f64;
+
+		in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
+		in_f->ilf_type = in_f64->ilf_type;
+		in_f->ilf_size = in_f64->ilf_size;
+		in_f->ilf_fields = in_f64->ilf_fields;
+		in_f->ilf_asize = in_f64->ilf_asize;
+		in_f->ilf_dsize = in_f64->ilf_dsize;
+		in_f->ilf_ino = in_f64->ilf_ino;
+		/* copy biggest field of ilf_u */
+		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
+		       in_f64->ilf_u.ilfu_uuid.__u_bits,
+		       sizeof(uuid_t));
+		in_f->ilf_blkno = in_f64->ilf_blkno;
+		in_f->ilf_len = in_f64->ilf_len;
+		in_f->ilf_boffset = in_f64->ilf_boffset;
+		return 0;
+	}
+	return EFSCORRUPTED;
+}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index c5dbf93b666..5db6cd1b4cf 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -23,25 +23,6 @@
  * log.  The size of the inline data/extents/b-tree root to be logged
  * (if any) is indicated in the ilf_dsize field.  Changes to this structure
  * must be added on to the end.
- *
- * Convention for naming inode log item versions :  The current version
- * is always named XFS_LI_INODE.  When an inode log item gets superseded,
- * add the latest version of IRIX that will generate logs with that item
- * to the version name.
- *
- * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first
- *	union (ilf_u) field.  This was released with IRIX 5.3-XFS.
- * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire
- *	structure.  This was released with IRIX 6.0.1-XFS and IRIX 6.1.
- * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2
- *	so a new structure definition wasn't necessary.  However, we had
- *	to add a new type because the inode cluster size changed from 4K
- *	to 8K and the version number had to be rev'ved to keep older kernels
- *	from trying to recover logs with the 8K buffers in them.  The logging
- *	code can handle recovery on different-sized clusters now so hopefully
- *	this'll be the last time we need to change the inode log item just
- *	for a change in the inode cluster size.  This new version was
- *	released with IRIX 6.2.
  */
 typedef struct xfs_inode_log_format {
 	unsigned short		ilf_type;	/* inode log item type */
@@ -59,18 +40,38 @@ typedef struct xfs_inode_log_format {
 	int			ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_t;
 
-/* Initial version shipped with IRIX 5.3-XFS */
-typedef struct xfs_inode_log_format_v1 {
-	unsigned short		ilf_type;	/* inode log item type */
-	unsigned short		ilf_size;	/* size of this item */
-	uint			ilf_fields;	/* flags for fields logged */
-	uint			ilf_dsize;	/* size of data/ext/root */
-	xfs_ino_t		ilf_ino;	/* inode number */
+typedef struct xfs_inode_log_format_32 {
+	unsigned short		ilf_type;	/* 16: inode log item type */
+	unsigned short		ilf_size;	/* 16: size of this item */
+	uint			ilf_fields;	/* 32: flags for fields logged */
+	ushort			ilf_asize;	/* 32: size of attr d/ext/root */
+	ushort			ilf_dsize;	/* 32: size of data/ext/root */
+	xfs_ino_t		ilf_ino;	/* 64: inode number */
 	union {
-		xfs_dev_t	ilfu_rdev;	/* rdev value for dev inode*/
-		uuid_t		ilfu_uuid;	/* mount point value */
+		xfs_dev_t	ilfu_rdev;	/* 32: rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* 128: mount point value */
+	} ilf_u;
+	__int64_t		ilf_blkno;	/* 64: blkno of inode buffer */
+	int			ilf_len;	/* 32: len of inode buffer */
+	int			ilf_boffset;	/* 32: off of inode in buffer */
+} __attribute__((packed)) xfs_inode_log_format_32_t;
+
+typedef struct xfs_inode_log_format_64 {
+	unsigned short		ilf_type;	/* 16: inode log item type */
+	unsigned short		ilf_size;	/* 16: size of this item */
+	uint			ilf_fields;	/* 32: flags for fields logged */
+	ushort			ilf_asize;	/* 32: size of attr d/ext/root */
+	ushort			ilf_dsize;	/* 32: size of data/ext/root */
+	__uint32_t		ilf_pad;	/* 32: pad for 64 bit boundary */
+	xfs_ino_t		ilf_ino;	/* 64: inode number */
+	union {
+		xfs_dev_t	ilfu_rdev;	/* 32: rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* 128: mount point value */
 	} ilf_u;
-} xfs_inode_log_format_t_v1;
+	__int64_t		ilf_blkno;	/* 64: blkno of inode buffer */
+	int			ilf_len;	/* 32: len of inode buffer */
+	int			ilf_boffset;	/* 32: off of inode in buffer */
+} xfs_inode_log_format_64_t;
 
 /*
  * Flags for xfs_trans_log_inode flags field.
@@ -172,6 +173,8 @@ extern void xfs_inode_item_destroy(struct xfs_inode *);
 extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
 extern void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
 extern void xfs_iflush_abort(struct xfs_inode *);
+extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
+					 xfs_inode_log_format_t *);
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
index a07815661a8..06d710c9ce4 100644
--- a/fs/xfs/xfs_iocore.c
+++ b/fs/xfs/xfs_iocore.c
@@ -24,14 +24,13 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
+#include "xfs_dfrag.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -58,7 +57,7 @@ xfs_size_fn(
 
 STATIC int
 xfs_ioinit(
-	struct vfs		*vfsp,
+	struct bhv_vfs		*vfsp,
 	struct xfs_mount_args	*mntargs,
 	int			flags)
 {
@@ -68,6 +67,7 @@ xfs_ioinit(
 xfs_ioops_t	xfs_iocore_xfs = {
 	.xfs_ioinit		= (xfs_ioinit_t) xfs_ioinit,
 	.xfs_bmapi_func		= (xfs_bmapi_t) xfs_bmapi,
+	.xfs_bunmapi_func	= (xfs_bunmapi_t) xfs_bunmapi,
 	.xfs_bmap_eof_func	= (xfs_bmap_eof_t) xfs_bmap_eof,
 	.xfs_iomap_write_direct =
 			(xfs_iomap_write_direct_t) xfs_iomap_write_direct,
@@ -84,6 +84,7 @@ xfs_ioops_t	xfs_iocore_xfs = {
 	.xfs_unlock		= (xfs_unlk_t) xfs_iunlock,
 	.xfs_size_func		= (xfs_size_t) xfs_size_fn,
 	.xfs_iodone		= (xfs_iodone_t) fs_noerr,
+	.xfs_swap_extents_func	= (xfs_swap_extents_t) xfs_swap_extents,
 };
 
 void
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d5dfedcb892..f1949c16df1 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_dmapi.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -252,7 +250,7 @@ xfs_iomap(
 	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
 			(xfs_filblks_t)(end_fsb - offset_fsb),
 			bmapi_flags,  NULL, 0, &imap,
-			&nimaps, NULL);
+			&nimaps, NULL, NULL);
 
 	if (error)
 		goto out;
@@ -519,8 +517,8 @@ xfs_iomap_write_direct(
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
 	nimaps = 1;
-	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-		bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list);
+	error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
+		&firstfsb, 0, &imap, &nimaps, &free_list, NULL);
 	if (error)
 		goto error0;
 
@@ -610,8 +608,8 @@ xfs_iomap_eof_want_preallocate(
 	while (count_fsb > 0) {
 		imaps = nimaps;
 		firstblock = NULLFSBLOCK;
-		error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
-				  0, &firstblock, 0, imap, &imaps, NULL);
+		error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
+				  &firstblock, 0, imap, &imaps, NULL, NULL);
 		if (error)
 			return error;
 		for (n = 0; n < imaps; n++) {
@@ -695,11 +693,11 @@ retry:
 
 	nimaps = XFS_WRITE_IMAPS;
 	firstblock = NULLFSBLOCK;
-	error = xfs_bmapi(NULL, ip, offset_fsb,
+	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
 			  (xfs_filblks_t)(last_fsb - offset_fsb),
 			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
 			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
-			  &nimaps, NULL);
+			  &nimaps, NULL, NULL);
 	if (error && (error != ENOSPC))
 		return XFS_ERROR(error);
 
@@ -832,9 +830,9 @@ xfs_iomap_write_allocate(
 			}
 
 			/* Go get the actual blocks */
-			error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
+			error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
 					XFS_BMAPI_WRITE, &first_block, 1,
-					imap, &nimaps, &free_list);
+					imap, &nimaps, &free_list, NULL);
 			if (error)
 				goto trans_cancel;
 
@@ -955,9 +953,9 @@ xfs_iomap_write_unwritten(
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
 		nimaps = 1;
-		error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
+		error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
 				  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
-				  1, &imap, &nimaps, &free_list);
+				  1, &imap, &nimaps, &free_list, NULL);
 		if (error)
 			goto error_on_bmapi_transaction;
 
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 94068d014f2..46249e4d1fe 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -41,11 +39,6 @@
 #include "xfs_error.h"
 #include "xfs_btree.h"
 
-#ifndef HAVE_USERACC
-#define useracc(ubuffer, size, flags, foo) (0)
-#define unuseracc(ubuffer, size, flags)
-#endif
-
 STATIC int
 xfs_bulkstat_one_iget(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
@@ -56,7 +49,7 @@ xfs_bulkstat_one_iget(
 {
 	xfs_dinode_core_t *dic;		/* dinode core info pointer */
 	xfs_inode_t	*ip;		/* incore inode pointer */
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 	int		error;
 
 	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -336,15 +329,6 @@ xfs_bulkstat(
 	nimask = ~(nicluster - 1);
 	nbcluster = nicluster >> mp->m_sb.sb_inopblog;
 	/*
-	 * Lock down the user's buffer. If a buffer was not sent, as in the case
-	 * disk quota code calls here, we skip this.
-	 */
-	if (ubuffer &&
-	    (error = useracc(ubuffer, ubcount * statstruct_size,
-			(B_READ|B_PHYS), NULL))) {
-		return error;
-	}
-	/*
 	 * Allocate a page-sized buffer for inode btree records.
 	 * We could try allocating something smaller, but for normal
 	 * calls we'll always (potentially) need the whole page.
@@ -650,8 +634,6 @@ xfs_bulkstat(
 	 * Done, we're either out of filesystem or space to put the data.
 	 */
 	kmem_free(irbuf, NBPC);
-	if (ubuffer)
-		unuseracc(ubuffer, ubcount * statstruct_size, (B_READ|B_PHYS));
 	*ubcountp = ubelem;
 	if (agno >= mp->m_sb.sb_agcount) {
 		/*
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 11eb4e1b18c..be5f12e07d2 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -45,7 +45,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
  */
 #define	BULKSTAT_FG_IGET	0x1	/* Go through the buffer cache */
 #define	BULKSTAT_FG_QUICK	0x2	/* No iget, walk the dinode cluster */
-#define BULKSTAT_FG_VFSLOCKED	0x4	/* Already have vfs lock */
 
 /*
  * Return stat information in bulk (by-inode) for the filesystem.
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 32e841d2f26..d8f5d4cbe8b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -36,7 +35,6 @@
 #include "xfs_ialloc_btree.h"
 #include "xfs_log_recover.h"
 #include "xfs_trans_priv.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -402,7 +400,7 @@ xfs_log_release_iclog(xfs_mount_t *mp,
 	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
 
 	if (xlog_state_release_iclog(log, iclog)) {
-		xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 		return EIO;
 	}
 
@@ -498,9 +496,8 @@ xfs_log_mount(xfs_mount_t	*mp,
 	 * just worked.
 	 */
 	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
-		int	error;
-		vfs_t	*vfsp = XFS_MTOVFS(mp);
-		int	readonly = (vfsp->vfs_flag & VFS_RDONLY);
+		bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
+		int		error, readonly = (vfsp->vfs_flag & VFS_RDONLY);
 
 		if (readonly)
 			vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -726,7 +723,7 @@ xfs_log_write(xfs_mount_t *	mp,
 		return XFS_ERROR(EIO);
 
 	if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
-		xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 	}
 	return error;
 }	/* xfs_log_write */
@@ -816,9 +813,9 @@ xfs_log_need_covered(xfs_mount_t *mp)
 	SPLDECL(s);
 	int		needed = 0, gen;
 	xlog_t		*log = mp->m_log;
-	vfs_t		*vfsp = XFS_MTOVFS(mp);
+	bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
 
-	if (fs_frozen(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
+	if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
 	    (vfsp->vfs_flag & VFS_RDONLY))
 		return 0;
 
@@ -956,7 +953,7 @@ xlog_iodone(xfs_buf_t *bp)
 			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
 		xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
 		XFS_BUF_STALE(bp);
-		xfs_force_shutdown(l->l_mp, XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
 		/*
 		 * This flag will be propagated to the trans-committed
 		 * callback routines to let them know that the log-commit
@@ -1261,7 +1258,7 @@ xlog_commit_record(xfs_mount_t  *mp,
 	ASSERT_ALWAYS(iclog);
 	if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
 			       iclog, XLOG_COMMIT_TRANS))) {
-		xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 	}
 	return error;
 }	/* xlog_commit_record */
@@ -1790,7 +1787,7 @@ xlog_write(xfs_mount_t *	mp,
 	xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
 		"xfs_log_write: reservation ran out. Need to up reservation");
 	/* If we did not panic, shutdown the filesystem */
-	xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 #endif
     } else
 	ticket->t_curr_res -= len;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1f0016b0b4e..55b4237c215 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -193,14 +191,14 @@ xlog_header_check_dump(
 {
 	int			b;
 
-	printk("%s:  SB : uuid = ", __FUNCTION__);
+	cmn_err(CE_DEBUG, "%s:  SB : uuid = ", __FUNCTION__);
 	for (b = 0; b < 16; b++)
-		printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]);
-	printk(", fmt = %d\n", XLOG_FMT);
-	printk("    log : uuid = ");
+		cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
+	cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
+	cmn_err(CE_DEBUG, "    log : uuid = ");
 	for (b = 0; b < 16; b++)
-		printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]);
-	printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
+		cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
+	cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
 }
 #else
 #define xlog_header_check_dump(mp, head)
@@ -282,7 +280,7 @@ xlog_recover_iodone(
 		mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
 		xfs_ioerror_alert("xlog_recover_iodone",
 				  mp, bp, XFS_BUF_ADDR(bp));
-		xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 	}
 	XFS_BUF_SET_FSPRIVATE(bp, NULL);
 	XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1889,7 +1887,7 @@ xlog_recover_do_inode_buffer(
 
 		buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
 					      next_unlinked_offset);
-		INT_SET(*buffer_nextp, ARCH_CONVERT, *logged_nextp);
+		*buffer_nextp = *logged_nextp;
 	}
 
 	return 0;
@@ -2292,12 +2290,22 @@ xlog_recover_do_inode_trans(
 	int			attr_index;
 	uint			fields;
 	xfs_dinode_core_t	*dicp;
+	int			need_free = 0;
 
 	if (pass == XLOG_RECOVER_PASS1) {
 		return 0;
 	}
 
-	in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
+	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
+		in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
+	} else {
+		in_f = (xfs_inode_log_format_t *)kmem_alloc(
+			sizeof(xfs_inode_log_format_t), KM_SLEEP);
+		need_free = 1;
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+		if (error)
+			goto error;
+	}
 	ino = in_f->ilf_ino;
 	mp = log->l_mp;
 	if (ITEM_TYPE(item) == XFS_LI_INODE) {
@@ -2323,8 +2331,10 @@ xlog_recover_do_inode_trans(
 	 * Inode buffers can be freed, look out for it,
 	 * and do not replay the inode.
 	 */
-	if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0))
-		return 0;
+	if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) {
+		error = 0;
+		goto error;
+	}
 
 	bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
 								XFS_BUF_LOCK);
@@ -2333,7 +2343,7 @@ xlog_recover_do_inode_trans(
 				  bp, imap.im_blkno);
 		error = XFS_BUF_GETERROR(bp);
 		xfs_buf_relse(bp);
-		return error;
+		goto error;
 	}
 	error = 0;
 	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
@@ -2350,7 +2360,8 @@ xlog_recover_do_inode_trans(
 			dip, bp, ino);
 		XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
 				 XFS_ERRLEVEL_LOW, mp);
-		return XFS_ERROR(EFSCORRUPTED);
+		error = EFSCORRUPTED;
+		goto error;
 	}
 	dicp = (xfs_dinode_core_t*)(item->ri_buf[1].i_addr);
 	if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
@@ -2360,7 +2371,8 @@ xlog_recover_do_inode_trans(
 			item, ino);
 		XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
 				 XFS_ERRLEVEL_LOW, mp);
-		return XFS_ERROR(EFSCORRUPTED);
+		error = EFSCORRUPTED;
+		goto error;
 	}
 
 	/* Skip replay when the on disk inode is newer than the log one */
@@ -2376,7 +2388,8 @@ xlog_recover_do_inode_trans(
 			/* do nothing */
 		} else {
 			xfs_buf_relse(bp);
-			return 0;
+			error = 0;
+			goto error;
 		}
 	}
 	/* Take the opportunity to reset the flush iteration count */
@@ -2391,7 +2404,8 @@ xlog_recover_do_inode_trans(
 			xfs_fs_cmn_err(CE_ALERT, mp,
 				"xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
 				item, dip, bp, ino);
-			return XFS_ERROR(EFSCORRUPTED);
+			error = EFSCORRUPTED;
+			goto error;
 		}
 	} else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
 		if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -2403,7 +2417,8 @@ xlog_recover_do_inode_trans(
 			xfs_fs_cmn_err(CE_ALERT, mp,
 				"xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
 				item, dip, bp, ino);
-			return XFS_ERROR(EFSCORRUPTED);
+			error = EFSCORRUPTED;
+			goto error;
 		}
 	}
 	if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
@@ -2415,7 +2430,8 @@ xlog_recover_do_inode_trans(
 			item, dip, bp, ino,
 			dicp->di_nextents + dicp->di_anextents,
 			dicp->di_nblocks);
-		return XFS_ERROR(EFSCORRUPTED);
+		error = EFSCORRUPTED;
+		goto error;
 	}
 	if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
 		XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
@@ -2424,7 +2440,8 @@ xlog_recover_do_inode_trans(
 		xfs_fs_cmn_err(CE_ALERT, mp,
 			"xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
 			item, dip, bp, ino, dicp->di_forkoff);
-		return XFS_ERROR(EFSCORRUPTED);
+		error = EFSCORRUPTED;
+		goto error;
 	}
 	if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) {
 		XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
@@ -2433,7 +2450,8 @@ xlog_recover_do_inode_trans(
 		xfs_fs_cmn_err(CE_ALERT, mp,
 			"xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p",
 			item->ri_buf[1].i_len, item);
-		return XFS_ERROR(EFSCORRUPTED);
+		error = EFSCORRUPTED;
+		goto error;
 	}
 
 	/* The core is in in-core format */
@@ -2521,7 +2539,8 @@ xlog_recover_do_inode_trans(
 			xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
 			ASSERT(0);
 			xfs_buf_relse(bp);
-			return XFS_ERROR(EIO);
+			error = EIO;
+			goto error;
 		}
 	}
 
@@ -2537,7 +2556,10 @@ write_inode_buffer:
 		error = xfs_bwrite(mp, bp);
 	}
 
-	return (error);
+error:
+	if (need_free)
+		kmem_free(in_f, sizeof(*in_f));
+	return XFS_ERROR(error);
 }
 
 /*
@@ -2674,32 +2696,32 @@ xlog_recover_do_dquot_trans(
  * structure into it, and adds the efi to the AIL with the given
  * LSN.
  */
-STATIC void
+STATIC int
 xlog_recover_do_efi_trans(
 	xlog_t			*log,
 	xlog_recover_item_t	*item,
 	xfs_lsn_t		lsn,
 	int			pass)
 {
+	int			error;
 	xfs_mount_t		*mp;
 	xfs_efi_log_item_t	*efip;
 	xfs_efi_log_format_t	*efi_formatp;
 	SPLDECL(s);
 
 	if (pass == XLOG_RECOVER_PASS1) {
-		return;
+		return 0;
 	}
 
 	efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
-	ASSERT(item->ri_buf[0].i_len ==
-	       (sizeof(xfs_efi_log_format_t) +
-		((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t))));
 
 	mp = log->l_mp;
 	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-	memcpy((char *)&(efip->efi_format), (char *)efi_formatp,
-	      sizeof(xfs_efi_log_format_t) +
-	      ((efi_formatp->efi_nextents - 1) * sizeof(xfs_extent_t)));
+	if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
+					 &(efip->efi_format)))) {
+		xfs_efi_item_free(efip);
+		return error;
+	}
 	efip->efi_next_extent = efi_formatp->efi_nextents;
 	efip->efi_flags |= XFS_EFI_COMMITTED;
 
@@ -2708,6 +2730,7 @@ xlog_recover_do_efi_trans(
 	 * xfs_trans_update_ail() drops the AIL lock.
 	 */
 	xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s);
+	return 0;
 }
 
 
@@ -2738,9 +2761,10 @@ xlog_recover_do_efd_trans(
 	}
 
 	efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
-	ASSERT(item->ri_buf[0].i_len ==
-	       (sizeof(xfs_efd_log_format_t) +
-		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_t))));
+	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
+	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
 	efi_id = efd_formatp->efd_efi_id;
 
 	/*
@@ -2810,15 +2834,14 @@ xlog_recover_do_trans(
 			if  ((error = xlog_recover_do_buffer_trans(log, item,
 								 pass)))
 				break;
-		} else if ((ITEM_TYPE(item) == XFS_LI_INODE) ||
-			   (ITEM_TYPE(item) == XFS_LI_6_1_INODE) ||
-			   (ITEM_TYPE(item) == XFS_LI_5_3_INODE)) {
+		} else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
 			if ((error = xlog_recover_do_inode_trans(log, item,
 								pass)))
 				break;
 		} else if (ITEM_TYPE(item) == XFS_LI_EFI) {
-			xlog_recover_do_efi_trans(log, item, trans->r_lsn,
-						  pass);
+			if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
+						  pass)))
+				break;
 		} else if (ITEM_TYPE(item) == XFS_LI_EFD) {
 			xlog_recover_do_efd_trans(log, item, pass);
 		} else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
@@ -3419,13 +3442,13 @@ xlog_unpack_data_checksum(
 	    if (rhead->h_chksum ||
 		((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
 		    cmn_err(CE_DEBUG,
-			"XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
+			"XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
 			    INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
 		    cmn_err(CE_DEBUG,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
 		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
 			    cmn_err(CE_DEBUG,
-				"XFS: LogR this is a LogV2 filesystem");
+				"XFS: LogR this is a LogV2 filesystem\n");
 		    }
 		    log->l_flags |= XLOG_CHKSUM_MISMATCH;
 	    }
@@ -3798,7 +3821,7 @@ xlog_do_log_recovery(
 	error = xlog_do_recovery_pass(log, head_blk, tail_blk,
 				      XLOG_RECOVER_PASS2);
 #ifdef DEBUG
-	{
+	if (!error) {
 		int	i;
 
 		for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
@@ -3974,7 +3997,7 @@ xlog_recover_finish(
 		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
 	} else {
 		cmn_err(CE_DEBUG,
-			"!Ending clean XFS mount for filesystem: %s",
+			"!Ending clean XFS mount for filesystem: %s\n",
 			log->l_mp->m_fsname);
 	}
 	return 0;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c0b1c290688..10dbf203c62 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -196,7 +194,7 @@ xfs_mount_free(
 		kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
 
 	if (remove_bhv) {
-		struct vfs	*vfsp = XFS_MTOVFS(mp);
+		struct bhv_vfs	*vfsp = XFS_MTOVFS(mp);
 
 		bhv_remove_all_vfsops(vfsp, 0);
 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
@@ -337,7 +335,7 @@ xfs_mount_validate_sb(
 
 xfs_agnumber_t
 xfs_initialize_perag(
-	struct vfs	*vfs,
+	bhv_vfs_t	*vfs,
 	xfs_mount_t	*mp,
 	xfs_agnumber_t	agcount)
 {
@@ -651,14 +649,14 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
  */
 int
 xfs_mountfs(
-	vfs_t		*vfsp,
+	bhv_vfs_t	*vfsp,
 	xfs_mount_t	*mp,
 	int		mfsi_flags)
 {
 	xfs_buf_t	*bp;
 	xfs_sb_t	*sbp = &(mp->m_sb);
 	xfs_inode_t	*rip;
-	vnode_t		*rvp = NULL;
+	bhv_vnode_t	*rvp = NULL;
 	int		readio_log, writeio_log;
 	xfs_daddr_t	d;
 	__uint64_t	ret64;
@@ -934,18 +932,7 @@ xfs_mountfs(
 	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
 	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
 
-	/*
-	 * Select the right directory manager.
-	 */
-	mp->m_dirops =
-		XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
-			xfsv2_dirops :
-			xfsv1_dirops;
-
-	/*
-	 * Initialize directory manager's entries.
-	 */
-	XFS_DIR_MOUNT(mp);
+	xfs_dir_mount(mp);
 
 	/*
 	 * Initialize the attribute manager's entries.
@@ -1006,8 +993,9 @@ xfs_mountfs(
 
 	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
 		cmn_err(CE_WARN, "XFS: corrupted root inode");
-		prdev("Root inode %llu is not a directory",
-		      mp->m_ddev_targp, (unsigned long long)rip->i_ino);
+		cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
+			XFS_BUFTARG_NAME(mp->m_ddev_targp),
+			(unsigned long long)rip->i_ino);
 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
 		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
 				 mp);
@@ -1094,7 +1082,7 @@ xfs_mountfs(
 int
 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 {
-	struct vfs	*vfsp = XFS_MTOVFS(mp);
+	struct bhv_vfs	*vfsp = XFS_MTOVFS(mp);
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	int64_t		fsid;
 #endif
@@ -1254,6 +1242,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
 
 	xfs_trans_log_buf(tp, bp, first, last);
 }
+
+/*
+ * In order to avoid ENOSPC-related deadlock caused by
+ * out-of-order locking of AGF buffer (PV 947395), we place
+ * constraints on the relationship among actual allocations for
+ * data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no
+ * actual space allocated for a delayed extent, for example, a data
+ * block in a certain AG is allocated but there is no additional
+ * block for the additional bmap btree block due to a split of the
+ * bmap btree of the file. The result of this may lead to an
+ * infinite loop in xfssyncd when the file gets flushed to disk and
+ * all delayed extents need to be actually allocated. To get around
+ * this, we explicitly set aside a few blocks which will not be
+ * reserved in delayed allocation. Considering the minimum number of
+ * needed freelist blocks is 4 fsbs, a potential split of file's bmap
+ * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
+*/
+#define SET_ASIDE_BLOCKS 8
+
 /*
  * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
  * a delta to a specified field in the in-core superblock.  Simply
@@ -1298,7 +1306,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
 		return 0;
 	case XFS_SBS_FDBLOCKS:
 
-		lcounter = (long long)mp->m_sb.sb_fdblocks;
+		lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
 		res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
 
 		if (delta > 0) {		/* Putting blocks back */
@@ -1332,7 +1340,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
 			}
 		}
 
-		mp->m_sb.sb_fdblocks = lcounter;
+		mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
 		return 0;
 	case XFS_SBS_FREXTENTS:
 		lcounter = (long long)mp->m_sb.sb_frextents;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 668ad23fd37..b2bd4be4200 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,8 +53,8 @@ typedef struct xfs_trans_reservations {
 #else
 struct cred;
 struct log;
-struct vfs;
-struct vnode;
+struct bhv_vfs;
+struct bhv_vnode;
 struct xfs_mount_args;
 struct xfs_ihash;
 struct xfs_chash;
@@ -63,9 +63,11 @@ struct xfs_perag;
 struct xfs_iocore;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
+struct xfs_extdelta;
+struct xfs_swapext;
 
-extern struct vfsops xfs_vfsops;
-extern struct vnodeops xfs_vnodeops;
+extern struct bhv_vfsops xfs_vfsops;
+extern struct bhv_vnodeops xfs_vnodeops;
 
 #define	AIL_LOCK_T		lock_t
 #define	AIL_LOCKINIT(x,y)	spinlock_init(x,y)
@@ -78,15 +80,15 @@ extern struct vnodeops xfs_vnodeops;
  * Prototypes and functions for the Data Migration subsystem.
  */
 
-typedef int	(*xfs_send_data_t)(int, struct vnode *,
-			xfs_off_t, size_t, int, vrwlock_t *);
+typedef int	(*xfs_send_data_t)(int, struct bhv_vnode *,
+			xfs_off_t, size_t, int, bhv_vrwlock_t *);
 typedef int	(*xfs_send_mmap_t)(struct vm_area_struct *, uint);
-typedef int	(*xfs_send_destroy_t)(struct vnode *, dm_right_t);
-typedef int	(*xfs_send_namesp_t)(dm_eventtype_t, struct vfs *,
-			struct vnode *,
-			dm_right_t, struct vnode *, dm_right_t,
+typedef int	(*xfs_send_destroy_t)(struct bhv_vnode *, dm_right_t);
+typedef int	(*xfs_send_namesp_t)(dm_eventtype_t, struct bhv_vfs *,
+			struct bhv_vnode *,
+			dm_right_t, struct bhv_vnode *, dm_right_t,
 			char *, char *, mode_t, int, int);
-typedef void	(*xfs_send_unmount_t)(struct vfs *, struct vnode *,
+typedef void	(*xfs_send_unmount_t)(struct bhv_vfs *, struct bhv_vnode *,
 			dm_right_t, mode_t, int, int);
 
 typedef struct xfs_dmops {
@@ -188,13 +190,18 @@ typedef struct xfs_qmops {
  * Prototypes and functions for I/O core modularization.
  */
 
-typedef int		(*xfs_ioinit_t)(struct vfs *,
+typedef int		(*xfs_ioinit_t)(struct bhv_vfs *,
 				struct xfs_mount_args *, int);
 typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
 				xfs_fileoff_t, xfs_filblks_t, int,
 				xfs_fsblock_t *, xfs_extlen_t,
 				struct xfs_bmbt_irec *, int *,
-				struct xfs_bmap_free *);
+				struct xfs_bmap_free *, struct xfs_extdelta *);
+typedef int		(*xfs_bunmapi_t)(struct xfs_trans *,
+				void *, xfs_fileoff_t,
+				xfs_filblks_t, int, xfs_extnum_t,
+				xfs_fsblock_t *, struct xfs_bmap_free *,
+				struct xfs_extdelta *, int *);
 typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
 typedef int		(*xfs_iomap_write_direct_t)(
 				void *, xfs_off_t, size_t, int,
@@ -213,11 +220,14 @@ typedef void		(*xfs_lock_demote_t)(void *, uint);
 typedef int		(*xfs_lock_nowait_t)(void *, uint);
 typedef void		(*xfs_unlk_t)(void *, unsigned int);
 typedef xfs_fsize_t	(*xfs_size_t)(void *);
-typedef xfs_fsize_t	(*xfs_iodone_t)(struct vfs *);
+typedef xfs_fsize_t	(*xfs_iodone_t)(struct bhv_vfs *);
+typedef int		(*xfs_swap_extents_t)(void *, void *,
+				struct xfs_swapext*);
 
 typedef struct xfs_ioops {
 	xfs_ioinit_t			xfs_ioinit;
 	xfs_bmapi_t			xfs_bmapi_func;
+	xfs_bunmapi_t			xfs_bunmapi_func;
 	xfs_bmap_eof_t			xfs_bmap_eof_func;
 	xfs_iomap_write_direct_t	xfs_iomap_write_direct;
 	xfs_iomap_write_delay_t		xfs_iomap_write_delay;
@@ -230,13 +240,17 @@ typedef struct xfs_ioops {
 	xfs_unlk_t			xfs_unlock;
 	xfs_size_t			xfs_size_func;
 	xfs_iodone_t			xfs_iodone;
+	xfs_swap_extents_t		xfs_swap_extents_func;
 } xfs_ioops_t;
 
 #define XFS_IOINIT(vfsp, args, flags) \
 	(*(mp)->m_io_ops.xfs_ioinit)(vfsp, args, flags)
-#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist)	\
+#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \
 	(*(mp)->m_io_ops.xfs_bmapi_func) \
-		(trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist)
+		(trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta)
+#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \
+	(*(mp)->m_io_ops.xfs_bunmapi_func) \
+		(trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done)
 #define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
 	(*(mp)->m_io_ops.xfs_bmap_eof_func) \
 		((io)->io_obj, endoff, whichfork, eof)
@@ -266,6 +280,9 @@ typedef struct xfs_ioops {
 	(*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
 #define XFS_IODONE(vfsp) \
 	(*(mp)->m_io_ops.xfs_iodone)(vfsp)
+#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \
+	(*(mp)->m_io_ops.xfs_swap_extents_func) \
+		((io)->io_obj, (tio)->io_obj, sxp)
 
 #ifdef HAVE_PERCPU_SB
 
@@ -386,8 +403,6 @@ typedef struct xfs_mount {
 	__uint8_t		m_inode_quiesce;/* call quiesce on new inodes.
 						   field governed by m_ilock */
 	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */
-	__uint8_t		m_dirversion;	/* 1 or 2 */
-	xfs_dirops_t		m_dirops;	/* table of dir funcs */
 	int			m_dirblksize;	/* directory block sz--bytes */
 	int			m_dirblkfsbs;	/* directory block sz--fsbs */
 	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */
@@ -494,16 +509,7 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 #define xfs_force_shutdown(m,f)	\
-	VFS_FORCE_SHUTDOWN((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
-
-/*
- * Flags sent to xfs_force_shutdown.
- */
-#define XFS_METADATA_IO_ERROR	0x1
-#define XFS_LOG_IO_ERROR	0x2
-#define XFS_FORCE_UMOUNT	0x4
-#define XFS_CORRUPT_INCORE	0x8	/* Corrupt in-memory data structures */
-#define XFS_SHUTDOWN_REMOTE_REQ 0x10	/* Shutdown came from remote cell */
+	bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
 
 /*
  * Flags for xfs_mountfs
@@ -521,7 +527,7 @@ xfs_preferred_iosize(xfs_mount_t *mp)
  * Macros for getting from mount to vfs and back.
  */
 #define	XFS_MTOVFS(mp)		xfs_mtovfs(mp)
-static inline struct vfs *xfs_mtovfs(xfs_mount_t *mp)
+static inline struct bhv_vfs *xfs_mtovfs(xfs_mount_t *mp)
 {
 	return bhvtovfs(&mp->m_bhv);
 }
@@ -533,7 +539,7 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp)
 }
 
 #define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
-static inline xfs_mount_t *xfs_vfstom(vfs_t *vfs)
+static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs)
 {
 	return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops));
 }
@@ -571,7 +577,7 @@ typedef struct xfs_mod_sb {
 extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
 extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
-extern int	xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
+extern int	xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
 
 extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
@@ -589,7 +595,7 @@ extern void	xfs_freesb(xfs_mount_t *);
 extern void	xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
 extern int	xfs_syncsub(xfs_mount_t *, int, int, int *);
 extern int	xfs_sync_inodes(xfs_mount_t *, int, int, int *);
-extern xfs_agnumber_t	xfs_initialize_perag(struct vfs *, xfs_mount_t *,
+extern xfs_agnumber_t	xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
 						xfs_agnumber_t);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
 
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 1408a32eef8..320d63ff9ca 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -23,7 +23,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 7fbef974bce..acb853b33eb 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -365,7 +365,7 @@ typedef struct xfs_dqtrxops {
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
-extern struct bhv_vfsops xfs_qmops;
+extern struct bhv_module_vfsops xfs_qmops;
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 1f148762eb2..d98171deaa1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -22,13 +22,11 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -40,7 +38,6 @@
 #include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
-#include "xfs_dir_leaf.h"
 
 
 /*
@@ -87,8 +84,8 @@ STATIC int
 xfs_lock_for_rename(
 	xfs_inode_t	*dp1,	/* old (source) directory inode */
 	xfs_inode_t	*dp2,	/* new (target) directory inode */
-	vname_t		*vname1,/* old entry name */
-	vname_t		*vname2,/* new entry name */
+	bhv_vname_t	*vname1,/* old entry name */
+	bhv_vname_t	*vname2,/* new entry name */
 	xfs_inode_t	**ipp1,	/* inode of old entry */
 	xfs_inode_t	**ipp2,	/* inode of new entry, if it
 				   already exists, NULL otherwise. */
@@ -225,9 +222,9 @@ xfs_lock_for_rename(
 int
 xfs_rename(
 	bhv_desc_t	*src_dir_bdp,
-	vname_t		*src_vname,
-	vnode_t		*target_dir_vp,
-	vname_t		*target_vname,
+	bhv_vname_t	*src_vname,
+	bhv_vnode_t	*target_dir_vp,
+	bhv_vname_t	*target_vname,
 	cred_t		*credp)
 {
 	xfs_trans_t	*tp;
@@ -242,7 +239,7 @@ xfs_rename(
 	int		committed;
 	xfs_inode_t	*inodes[4];
 	int		target_ip_dropped = 0;	/* dropped target_ip link? */
-	vnode_t		*src_dir_vp;
+	bhv_vnode_t	*src_dir_vp;
 	int		spaceres;
 	int		target_link_zero = 0;
 	int		num_inodes;
@@ -398,34 +395,29 @@ xfs_rename(
 		 * fit before actually inserting it.
 		 */
 		if (spaceres == 0 &&
-		    (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name,
-				target_namelen))) {
+		    (error = xfs_dir_canenter(tp, target_dp, target_name,
+						target_namelen)))
 			goto error_return;
-		}
 		/*
 		 * If target does not exist and the rename crosses
 		 * directories, adjust the target directory link count
 		 * to account for the ".." reference from the new entry.
 		 */
-		error = XFS_DIR_CREATENAME(mp, tp, target_dp, target_name,
+		error = xfs_dir_createname(tp, target_dp, target_name,
 					   target_namelen, src_ip->i_ino,
 					   &first_block, &free_list, spaceres);
-		if (error == ENOSPC) {
+		if (error == ENOSPC)
 			goto error_return;
-		}
-		if (error) {
+		if (error)
 			goto abort_return;
-		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
 		if (new_parent && src_is_directory) {
 			error = xfs_bumplink(tp, target_dp);
-			if (error) {
+			if (error)
 				goto abort_return;
-			}
 		}
 	} else { /* target_ip != NULL */
-
 		/*
 		 * If target exists and it's a directory, check that both
 		 * target and source are directories and that target can be
@@ -435,7 +427,7 @@ xfs_rename(
 			/*
 			 * Make sure target dir is empty.
 			 */
-			if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) ||
+			if (!(xfs_dir_isempty(target_ip)) ||
 			    (target_ip->i_d.di_nlink > 2)) {
 				error = XFS_ERROR(EEXIST);
 				goto error_return;
@@ -451,12 +443,11 @@ xfs_rename(
 		 * In case there is already an entry with the same
 		 * name at the destination directory, remove it first.
 		 */
-		error = XFS_DIR_REPLACE(mp, tp, target_dp, target_name,
-			target_namelen, src_ip->i_ino, &first_block,
-			&free_list, spaceres);
-		if (error) {
+		error = xfs_dir_replace(tp, target_dp, target_name,
+					target_namelen, src_ip->i_ino,
+					&first_block, &free_list, spaceres);
+		if (error)
 			goto abort_return;
-		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
 		/*
@@ -464,9 +455,8 @@ xfs_rename(
 		 * dir no longer points to it.
 		 */
 		error = xfs_droplink(tp, target_ip);
-		if (error) {
+		if (error)
 			goto abort_return;
-		}
 		target_ip_dropped = 1;
 
 		if (src_is_directory) {
@@ -474,9 +464,8 @@ xfs_rename(
 			 * Drop the link from the old "." entry.
 			 */
 			error = xfs_droplink(tp, target_ip);
-			if (error) {
+			if (error)
 				goto abort_return;
-			}
 		}
 
 		/* Do this test while we still hold the locks */
@@ -488,18 +477,15 @@ xfs_rename(
 	 * Remove the source.
 	 */
 	if (new_parent && src_is_directory) {
-
 		/*
 		 * Rewrite the ".." entry to point to the new
 		 * directory.
 		 */
-		error = XFS_DIR_REPLACE(mp, tp, src_ip, "..", 2,
-					target_dp->i_ino, &first_block,
-					&free_list, spaceres);
+		error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino,
+					&first_block, &free_list, spaceres);
 		ASSERT(error != EEXIST);
-		if (error) {
+		if (error)
 			goto abort_return;
-		}
 		xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
 	} else {
@@ -527,16 +513,14 @@ xfs_rename(
 		 * entry that's moved no longer points to it.
 		 */
 		error = xfs_droplink(tp, src_dp);
-		if (error) {
+		if (error)
 			goto abort_return;
-		}
 	}
 
-	error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen,
+	error = xfs_dir_removename(tp, src_dp, src_name, src_namelen,
 			src_ip->i_ino, &first_block, &free_list, spaceres);
-	if (error) {
+	if (error)
 		goto abort_return;
-	}
 	xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
 	/*
@@ -609,7 +593,7 @@ xfs_rename(
 	 * Let interposed file systems know about removed links.
 	 */
 	if (target_ip_dropped) {
-		VOP_LINK_REMOVED(XFS_ITOV(target_ip), target_dir_vp,
+		bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp,
 					target_link_zero);
 		IRELE(target_ip);
 	}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5b413946b1c..0c1e42b037e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -141,7 +139,7 @@ xfs_growfs_rt_alloc(
 		cancelflags |= XFS_TRANS_ABORT;
 		error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
 			XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
-			resblks, &map, &nmap, &flist);
+			resblks, &map, &nmap, &flist, NULL);
 		if (!error && nmap < 1)
 			error = XFS_ERROR(ENOSPC);
 		if (error)
@@ -2404,10 +2402,10 @@ xfs_rtprint_range(
 {
 	xfs_extlen_t	i;		/* block number in the extent */
 
-	printk("%Ld: ", (long long)start);
+	cmn_err(CE_DEBUG, "%Ld: ", (long long)start);
 	for (i = 0; i < len; i++)
-		printk("%d", xfs_rtcheck_bit(mp, tp, start + i, 1));
-	printk("\n");
+		cmn_err(CE_DEBUG, "%d", xfs_rtcheck_bit(mp, tp, start + i, 1));
+	cmn_err(CE_DEBUG, "\n");
 }
 
 /*
@@ -2431,17 +2429,17 @@ xfs_rtprint_summary(
 			(void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c);
 			if (c) {
 				if (!p) {
-					printk("%Ld-%Ld:", 1LL << l,
+					cmn_err(CE_DEBUG, "%Ld-%Ld:", 1LL << l,
 						XFS_RTMIN((1LL << l) +
 							  ((1LL << l) - 1LL),
 							 mp->m_sb.sb_rextents));
 					p = 1;
 				}
-				printk(" %Ld:%d", (long long)i, c);
+				cmn_err(CE_DEBUG, " %Ld:%d", (long long)i, c);
 			}
 		}
 		if (p)
-			printk("\n");
+			cmn_err(CE_DEBUG, "\n");
 	}
 	if (sumbp)
 		xfs_trans_brelse(tp, sumbp);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index a59c102cf21..defb2febaaf 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -92,6 +90,90 @@ xfs_write_clear_setuid(
 }
 
 /*
+ * Handle logging requirements of various synchronous types of write.
+ */
+int
+xfs_write_sync_logforce(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip)
+{
+	int		error = 0;
+
+	/*
+	 * If we're treating this as O_DSYNC and we have not updated the
+	 * size, force the log.
+	 */
+	if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
+	    !(ip->i_update_size)) {
+		xfs_inode_log_item_t	*iip = ip->i_itemp;
+
+		/*
+		 * If an allocation transaction occurred
+		 * without extending the size, then we have to force
+		 * the log up the proper point to ensure that the
+		 * allocation is permanent.  We can't count on
+		 * the fact that buffered writes lock out direct I/O
+		 * writes - the direct I/O write could have extended
+		 * the size nontransactionally, then finished before
+		 * we started.  xfs_write_file will think that the file
+		 * didn't grow but the update isn't safe unless the
+		 * size change is logged.
+		 *
+		 * Force the log if we've committed a transaction
+		 * against the inode or if someone else has and
+		 * the commit record hasn't gone to disk (e.g.
+		 * the inode is pinned).  This guarantees that
+		 * all changes affecting the inode are permanent
+		 * when we return.
+		 */
+		if (iip && iip->ili_last_lsn) {
+			xfs_log_force(mp, iip->ili_last_lsn,
+					XFS_LOG_FORCE | XFS_LOG_SYNC);
+		} else if (xfs_ipincount(ip) > 0) {
+			xfs_log_force(mp, (xfs_lsn_t)0,
+					XFS_LOG_FORCE | XFS_LOG_SYNC);
+		}
+
+	} else {
+		xfs_trans_t	*tp;
+
+		/*
+		 * O_SYNC or O_DSYNC _with_ a size update are handled
+		 * the same way.
+		 *
+		 * If the write was synchronous then we need to make
+		 * sure that the inode modification time is permanent.
+		 * We'll have updated the timestamp above, so here
+		 * we use a synchronous transaction to log the inode.
+		 * It's not fast, but it's necessary.
+		 *
+		 * If this a dsync write and the size got changed
+		 * non-transactionally, then we need to ensure that
+		 * the size change gets logged in a synchronous
+		 * transaction.
+		 */
+		tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
+		if ((error = xfs_trans_reserve(tp, 0,
+						XFS_SWRITE_LOG_RES(mp),
+						0, 0, 0))) {
+			/* Transaction reserve failed */
+			xfs_trans_cancel(tp, 0);
+		} else {
+			/* Transaction reserve successful */
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+			xfs_trans_ihold(tp, ip);
+			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+			xfs_trans_set_sync(tp);
+			error = xfs_trans_commit(tp, 0, NULL);
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		}
+	}
+
+	return error;
+}
+
+/*
  * Force a shutdown of the filesystem instantly while keeping
  * the filesystem consistent. We don't do an unmount here; just shutdown
  * the shop, make sure that absolutely nothing persistent happens to
@@ -109,12 +191,12 @@ xfs_do_force_shutdown(
 	xfs_mount_t	*mp;
 
 	mp = XFS_BHVTOM(bdp);
-	logerror = flags & XFS_LOG_IO_ERROR;
+	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
 
-	if (!(flags & XFS_FORCE_UMOUNT)) {
-		cmn_err(CE_NOTE,
-		"xfs_force_shutdown(%s,0x%x) called from line %d of file %s.  Return address = 0x%p",
-			mp->m_fsname,flags,lnnum,fname,__return_address);
+	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
+		cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
+				 "line %d of file %s.  Return address = 0x%p",
+			mp->m_fsname, flags, lnnum, fname, __return_address);
 	}
 	/*
 	 * No need to duplicate efforts.
@@ -125,33 +207,37 @@ xfs_do_force_shutdown(
 	/*
 	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
 	 * queue up anybody new on the log reservations, and wakes up
-	 * everybody who's sleeping on log reservations and tells
-	 * them the bad news.
+	 * everybody who's sleeping on log reservations to tell them
+	 * the bad news.
 	 */
 	if (xfs_log_force_umount(mp, logerror))
 		return;
 
-	if (flags & XFS_CORRUPT_INCORE) {
+	if (flags & SHUTDOWN_CORRUPT_INCORE) {
 		xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
     "Corruption of in-memory data detected.  Shutting down filesystem: %s",
 			mp->m_fsname);
 		if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
 			xfs_stack_trace();
 		}
-	} else if (!(flags & XFS_FORCE_UMOUNT)) {
+	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 		if (logerror) {
 			xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
-			"Log I/O Error Detected.  Shutting down filesystem: %s",
+		"Log I/O Error Detected.  Shutting down filesystem: %s",
+				mp->m_fsname);
+		} else if (flags & SHUTDOWN_DEVICE_REQ) {
+			xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
+		"All device paths lost.  Shutting down filesystem: %s",
 				mp->m_fsname);
-		} else if (!(flags & XFS_SHUTDOWN_REMOTE_REQ)) {
+		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
 			xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
-				"I/O Error Detected.  Shutting down filesystem: %s",
+		"I/O Error Detected.  Shutting down filesystem: %s",
 				mp->m_fsname);
 		}
 	}
-	if (!(flags & XFS_FORCE_UMOUNT)) {
-		cmn_err(CE_ALERT,
-		"Please umount the filesystem, and rectify the problem(s)");
+	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
+		cmn_err(CE_ALERT, "Please umount the filesystem, "
+				  "and rectify the problem(s)");
 	}
 }
 
@@ -335,7 +421,7 @@ xfs_bwrite(
 		 * from bwrite and we could be tracing a buffer that has
 		 * been reused.
 		 */
-		xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 	}
 	return (error);
 }
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index e6379564447..188b296ff50 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -75,6 +75,7 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
  * Prototypes for functions in xfs_rw.c.
  */
 extern int xfs_write_clear_setuid(struct xfs_inode *ip);
+extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip);
 extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
 extern int xfs_bioerror(struct xfs_buf *bp);
 extern int xfs_bioerror_relse(struct xfs_buf *bp);
@@ -87,9 +88,10 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
 /*
  * Prototypes for functions in xfs_vnodeops.c.
  */
-extern int xfs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock);
-extern void xfs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock);
-extern int xfs_setattr(bhv_desc_t *bdp, vattr_t *vap, int flags, cred_t *credp);
+extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
+extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
+extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
+		       cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
 				 xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8d056cef5d1..ee2721e0de4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -33,7 +32,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -236,11 +234,8 @@ xfs_trans_alloc(
 	xfs_mount_t	*mp,
 	uint		type)
 {
-	fs_check_frozen(XFS_MTOVFS(mp), SB_FREEZE_TRANS);
-	atomic_inc(&mp->m_active_trans);
-
-	return (_xfs_trans_alloc(mp, type));
-
+	vfs_wait_for_freeze(XFS_MTOVFS(mp), SB_FREEZE_TRANS);
+	return _xfs_trans_alloc(mp, type);
 }
 
 xfs_trans_t *
@@ -250,12 +245,9 @@ _xfs_trans_alloc(
 {
 	xfs_trans_t	*tp;
 
-	ASSERT(xfs_trans_zone != NULL);
-	tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
+	atomic_inc(&mp->m_active_trans);
 
-	/*
-	 * Initialize the transaction structure.
-	 */
+	tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
 	tp->t_magic = XFS_TRANS_MAGIC;
 	tp->t_type = type;
 	tp->t_mountp = mp;
@@ -263,8 +255,7 @@ _xfs_trans_alloc(
 	tp->t_busy_free = XFS_LBC_NUM_SLOTS;
 	XFS_LIC_INIT(&(tp->t_items));
 	XFS_LBC_INIT(&(tp->t_busy));
-
-	return (tp);
+	return tp;
 }
 
 /*
@@ -303,7 +294,7 @@ xfs_trans_dup(
 	tp->t_blk_res = tp->t_blk_res_used;
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
-	PFLAGS_DUP(&tp->t_pflags, &ntp->t_pflags);
+	ntp->t_pflags = tp->t_pflags;
 
 	XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
 
@@ -335,14 +326,11 @@ xfs_trans_reserve(
 	uint		logcount)
 {
 	int		log_flags;
-	int		error;
-	int	rsvd;
-
-	error = 0;
-	rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+	int		error = 0;
+	int		rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
 	/* Mark this thread as being in a transaction */
-        PFLAGS_SET_FSTRANS(&tp->t_pflags);
+	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
 	/*
 	 * Attempt to reserve the needed disk blocks by decrementing
@@ -353,7 +341,7 @@ xfs_trans_reserve(
 		error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
 					  -blocks, rsvd);
 		if (error != 0) {
-                        PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+			current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 			return (XFS_ERROR(ENOSPC));
 		}
 		tp->t_blk_res += blocks;
@@ -426,9 +414,9 @@ undo_blocks:
 		tp->t_blk_res = 0;
 	}
 
-        PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
-	return (error);
+	return error;
 }
 
 
@@ -819,7 +807,7 @@ shut_us_down:
 			if (commit_lsn == -1 && !shutdown)
 				shutdown = XFS_ERROR(EIO);
 		}
-                PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+		current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 		xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
 		xfs_trans_free_busy(tp);
 		xfs_trans_free(tp);
@@ -846,7 +834,7 @@ shut_us_down:
 	 */
 	nvec = xfs_trans_count_vecs(tp);
 	if (nvec == 0) {
-		xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
+		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 		goto shut_us_down;
 	} else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
 		log_vector = log_vector_fast;
@@ -884,7 +872,7 @@ shut_us_down:
 	 * had pinned, clean up, free trans structure, and return error.
 	 */
 	if (error || commit_lsn == -1) {
-                PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+		current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 		xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
 		return XFS_ERROR(EIO);
 	}
@@ -926,7 +914,7 @@ shut_us_down:
 	/*
 	 * Mark this thread as no longer being in a transaction
 	 */
-	PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
 	/*
 	 * Once all the items of the transaction have been copied
@@ -1148,7 +1136,7 @@ xfs_trans_cancel(
 	 */
 	if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
 		XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
-		xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 	}
 #ifdef DEBUG
 	if (!(flags & XFS_TRANS_ABORT)) {
@@ -1182,7 +1170,7 @@ xfs_trans_cancel(
 	}
 
 	/* mark this thread as no longer being in a transaction */
-        PFLAGS_RESTORE_FSTRANS(&tp->t_pflags);
+	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
 	xfs_trans_free_items(tp, flags);
 	xfs_trans_free_busy(tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 100d9a4b38e..cb65c3a603f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -805,12 +805,9 @@ typedef struct xfs_trans {
 	((mp)->m_sb.sb_inodesize + \
 	 (mp)->m_sb.sb_sectsize * 2 + \
 	 (mp)->m_dirblksize + \
-	 (XFS_DIR_IS_V1(mp) ? 0 : \
-	    XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \
+	 XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
 	 XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-	 (128 * (4 + \
-		 (XFS_DIR_IS_V1(mp) ? 0 : \
-			 XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
+	 (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
 		 XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
 
 #define	XFS_ADDAFORK_LOG_RES(mp)	((mp)->m_reservations.tr_addafork)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 19ab24af1c1..558c87ff0c4 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -22,7 +22,6 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
@@ -363,9 +362,10 @@ xfs_trans_delete_ail(
 			AIL_UNLOCK(mp, s);
 		else {
 			xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
-				"xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL");
+		"%s: attempting to delete a log item that is not in the AIL",
+					__FUNCTION__);
 			AIL_UNLOCK(mp, s);
-			xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 		}
 	}
 }
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c74c31ebc81..60b6b898022 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -320,7 +318,7 @@ xfs_trans_read_buf(
 			if (xfs_error_target == target) {
 				if (((xfs_req_num++) % xfs_error_mod) == 0) {
 					xfs_buf_relse(bp);
-					printk("Returning error!\n");
+					cmn_err(CE_DEBUG, "Returning error!\n");
 					return XFS_ERROR(EIO);
 				}
 			}
@@ -369,7 +367,7 @@ xfs_trans_read_buf(
 				 */
 				if (tp->t_flags & XFS_TRANS_DIRTY)
 					xfs_force_shutdown(tp->t_mountp,
-							   XFS_METADATA_IO_ERROR);
+							SHUTDOWN_META_IO_ERROR);
 				return error;
 			}
 		}
@@ -414,7 +412,7 @@ xfs_trans_read_buf(
 		xfs_ioerror_alert("xfs_trans_read_buf", mp,
 				  bp, blkno);
 		if (tp->t_flags & XFS_TRANS_DIRTY)
-			xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR);
+			xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
 		xfs_buf_relse(bp);
 		return error;
 	}
@@ -423,9 +421,9 @@ xfs_trans_read_buf(
 		if (xfs_error_target == target) {
 			if (((xfs_req_num++) % xfs_error_mod) == 0) {
 				xfs_force_shutdown(tp->t_mountp,
-						   XFS_METADATA_IO_ERROR);
+						   SHUTDOWN_META_IO_ERROR);
 				xfs_buf_relse(bp);
-				printk("Returning error in trans!\n");
+				cmn_err(CE_DEBUG, "Returning trans error!\n");
 				return XFS_ERROR(EIO);
 			}
 		}
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 7d7d627f25d..b290270dd4a 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -22,7 +22,6 @@
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_dir.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 7c5894d59f8..b8db1d5cde5 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -24,14 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 1117d600d74..2912aac07c7 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -493,7 +493,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
 				break;
 			} else {
 				/* out-of-order vacancy */
-				printk("OOO vacancy lbcp 0x%p\n", lbcp);
+				cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp);
 				ASSERT(0);
 			}
 		}
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index 7fe3792b18d..4ea2e5074bd 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -30,8 +30,7 @@
 	  XFS_EXTENTADD_SPACE_RES(mp,w))
 #define	XFS_DAENTER_1B(mp,w)	((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
 #define	XFS_DAENTER_DBS(mp,w)	\
-	(XFS_DA_NODE_MAXDEPTH + \
-	 ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0))
+	(XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))
 #define	XFS_DAENTER_BLOCKS(mp,w)	\
 	(XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w))
 #define	XFS_DAENTER_BMAP1B(mp,w)	\
@@ -41,10 +40,7 @@
 #define	XFS_DAENTER_SPACE_RES(mp,w)	\
 	(XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w))
 #define	XFS_DAREMOVE_SPACE_RES(mp,w)	XFS_DAENTER_BMAPS(mp,w)
-#define	XFS_DIRENTER_MAX_SPLIT(mp,nl)	\
-	(((mp)->m_sb.sb_blocksize == 512 && \
-	  XFS_DIR_IS_V1(mp) && \
-	  (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1)
+#define	XFS_DIRENTER_MAX_SPLIT(mp,nl)	1
 #define	XFS_DIRENTER_SPACE_RES(mp,nl)	\
 	(XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \
 	 XFS_DIRENTER_MAX_SPLIT(mp,nl))
@@ -57,8 +53,7 @@
  * Space reservation values for various transactions.
  */
 #define	XFS_ADDAFORK_SPACE_RES(mp)	\
-	((mp)->m_dirblkfsbs + \
-	 (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)))
+	((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
 #define	XFS_ATTRRM_SPACE_RES(mp)	\
 	XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
 /* This macro is not used - see inline code in xfs_attr_set */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 34654ec6ae1..9014d7e4448 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -24,12 +24,10 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -51,10 +49,10 @@
  */
 int
 xfs_get_dir_entry(
-	vname_t		*dentry,
+	bhv_vname_t	*dentry,
 	xfs_inode_t	**ipp)
 {
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = VNAME_TO_VNODE(dentry);
 
@@ -69,11 +67,11 @@ int
 xfs_dir_lookup_int(
 	bhv_desc_t	*dir_bdp,
 	uint		lock_mode,
-	vname_t		*dentry,
+	bhv_vname_t	*dentry,
 	xfs_ino_t	*inum,
 	xfs_inode_t	**ipp)
 {
-	vnode_t		*dir_vp;
+	bhv_vnode_t	*dir_vp;
 	xfs_inode_t	*dp;
 	int		error;
 
@@ -82,8 +80,7 @@ xfs_dir_lookup_int(
 
 	dp = XFS_BHVTOI(dir_bdp);
 
-	error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp,
-				VNAME(dentry), VNAMELEN(dentry), inum);
+	error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum);
 	if (!error) {
 		/*
 		 * Unlock the directory. We do this because we can't
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 472661a3b6d..fe953e98afa 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -23,9 +23,10 @@
 #define	ITRACE(ip)	vn_trace_ref(XFS_ITOV(ip), __FILE__, __LINE__, \
 				(inst_t *)__return_address)
 
-extern int xfs_rename (bhv_desc_t *, vname_t *, vnode_t *, vname_t *, cred_t *);
-extern int xfs_get_dir_entry (vname_t *, xfs_inode_t **);
-extern int xfs_dir_lookup_int (bhv_desc_t *, uint, vname_t *, xfs_ino_t *,
+extern int xfs_rename (bhv_desc_t *, bhv_vname_t *, bhv_vnode_t *,
+			bhv_vname_t *, cred_t *);
+extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
+extern int xfs_dir_lookup_int (bhv_desc_t *, uint, bhv_vname_t *, xfs_ino_t *,
 				xfs_inode_t **);
 extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
 extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 36ea1b2094f..6c96391f3f1 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -24,7 +24,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -32,7 +31,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
@@ -131,9 +129,6 @@ xfs_init(void)
 #ifdef XFS_BMBT_TRACE
 	xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
 #endif
-#ifdef XFS_DIR_TRACE
-	xfs_dir_trace_buf = ktrace_alloc(XFS_DIR_TRACE_SIZE, KM_SLEEP);
-#endif
 #ifdef XFS_ATTR_TRACE
 	xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
 #endif
@@ -177,9 +172,6 @@ xfs_cleanup(void)
 #ifdef XFS_ATTR_TRACE
 	ktrace_free(xfs_attr_trace_buf);
 #endif
-#ifdef XFS_DIR_TRACE
-	ktrace_free(xfs_dir_trace_buf);
-#endif
 #ifdef XFS_BMBT_TRACE
 	ktrace_free(xfs_bmbt_trace_buf);
 #endif
@@ -212,7 +204,7 @@ xfs_cleanup(void)
  */
 STATIC int
 xfs_start_flags(
-	struct vfs		*vfs,
+	struct bhv_vfs		*vfs,
 	struct xfs_mount_args	*ap,
 	struct xfs_mount	*mp)
 {
@@ -337,7 +329,7 @@ xfs_start_flags(
  */
 STATIC int
 xfs_finish_flags(
-	struct vfs		*vfs,
+	struct bhv_vfs		*vfs,
 	struct xfs_mount_args	*ap,
 	struct xfs_mount	*mp)
 {
@@ -423,7 +415,7 @@ xfs_mount(
 	struct xfs_mount_args	*args,
 	cred_t			*credp)
 {
-	struct vfs		*vfsp = bhvtovfs(bhvp);
+	struct bhv_vfs		*vfsp = bhvtovfs(bhvp);
 	struct bhv_desc		*p;
 	struct xfs_mount	*mp = XFS_BHVTOM(bhvp);
 	struct block_device	*ddev, *logdev, *rtdev;
@@ -552,10 +544,10 @@ xfs_unmount(
 	int		flags,
 	cred_t		*credp)
 {
-	struct vfs	*vfsp = bhvtovfs(bdp);
+	bhv_vfs_t	*vfsp = bhvtovfs(bdp);
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 	xfs_inode_t	*rip;
-	vnode_t		*rvp;
+	bhv_vnode_t	*rvp;
 	int		unmount_event_wanted = 0;
 	int		unmount_event_flags = 0;
 	int		xfs_unmountfs_needed = 0;
@@ -665,9 +657,8 @@ xfs_mntupdate(
 	int				*flags,
 	struct xfs_mount_args		*args)
 {
-	struct vfs	*vfsp = bhvtovfs(bdp);
+	bhv_vfs_t	*vfsp = bhvtovfs(bdp);
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
-	int		error;
 
 	if (!(*flags & MS_RDONLY)) {			/* rw/ro -> rw */
 		if (vfsp->vfs_flag & VFS_RDONLY)
@@ -679,7 +670,7 @@ xfs_mntupdate(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 		}
 	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
-		VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
+		bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
 		xfs_quiesce_fs(mp);
 		xfs_log_unmount_write(mp);
 		xfs_unmountfs_writesb(mp);
@@ -702,7 +693,7 @@ xfs_unmount_flush(
 	xfs_inode_t	*rip = mp->m_rootip;
 	xfs_inode_t	*rbmip;
 	xfs_inode_t	*rsumip = NULL;
-	vnode_t		*rvp = XFS_ITOV(rip);
+	bhv_vnode_t	*rvp = XFS_ITOV(rip);
 	int		error;
 
 	xfs_ilock(rip, XFS_ILOCK_EXCL);
@@ -781,9 +772,9 @@ fscorrupt_out2:
 STATIC int
 xfs_root(
 	bhv_desc_t	*bdp,
-	vnode_t		**vpp)
+	bhv_vnode_t	**vpp)
 {
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip);
 	VN_HOLD(vp);
@@ -801,8 +792,8 @@ xfs_root(
 STATIC int
 xfs_statvfs(
 	bhv_desc_t	*bdp,
-	xfs_statfs_t	*statp,
-	vnode_t		*vp)
+	bhv_statvfs_t	*statp,
+	bhv_vnode_t	*vp)
 {
 	__uint64_t	fakeinos;
 	xfs_extlen_t	lsize;
@@ -900,7 +891,7 @@ xfs_sync(
 /*
  * xfs sync routine for internal use
  *
- * This routine supports all of the flags defined for the generic VFS_SYNC
+ * This routine supports all of the flags defined for the generic vfs_sync
  * interface as explained above under xfs_sync.  In the interests of not
  * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
@@ -917,7 +908,7 @@ xfs_sync_inodes(
 	xfs_inode_t	*ip = NULL;
 	xfs_inode_t	*ip_next;
 	xfs_buf_t	*bp;
-	vnode_t		*vp = NULL;
+	bhv_vnode_t	*vp = NULL;
 	int		error;
 	int		last_error;
 	uint64_t	fflag;
@@ -1156,9 +1147,9 @@ xfs_sync_inodes(
 			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 			if (XFS_FORCED_SHUTDOWN(mp)) {
-				VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF);
+				bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
 			} else {
-				VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF);
+				bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
 			}
 
 			xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1178,8 +1169,8 @@ xfs_sync_inodes(
 				 * across calls to the buffer cache.
 				 */
 				xfs_iunlock(ip, XFS_ILOCK_SHARED);
-				VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1,
-							fflag, FI_NONE, error);
+				error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
+							-1, fflag, FI_NONE);
 				xfs_ilock(ip, XFS_ILOCK_SHARED);
 			}
 
@@ -1231,9 +1222,7 @@ xfs_sync_inodes(
 						 * marker and free it.
 						 */
 						XFS_MOUNT_ILOCK(mp);
-
 						IPOINTER_REMOVE(ip, mp);
-
 						XFS_MOUNT_IUNLOCK(mp);
 
 						ASSERT(!(lock_flags &
@@ -1421,7 +1410,7 @@ xfs_sync_inodes(
 /*
  * xfs sync routine for internal use
  *
- * This routine supports all of the flags defined for the generic VFS_SYNC
+ * This routine supports all of the flags defined for the generic vfs_sync
  * interface as explained above under xfs_sync.  In the interests of not
  * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
@@ -1574,7 +1563,7 @@ xfs_syncsub(
 STATIC int
 xfs_vget(
 	bhv_desc_t	*bdp,
-	vnode_t		**vpp,
+	bhv_vnode_t	**vpp,
 	fid_t		*fidp)
 {
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
@@ -1657,10 +1646,10 @@ xfs_vget(
 #define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
 
 STATIC unsigned long
-suffix_strtoul(const char *cp, char **endp, unsigned int base)
+suffix_strtoul(char *s, char **endp, unsigned int base)
 {
 	int	last, shift_left_factor = 0;
-	char	*value = (char *)cp;
+	char	*value = s;
 
 	last = strlen(value) - 1;
 	if (value[last] == 'K' || value[last] == 'k') {
@@ -1676,7 +1665,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base)
 		value[last] = '\0';
 	}
 
-	return simple_strtoul(cp, endp, base) << shift_left_factor;
+	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
 }
 
 STATIC int
@@ -1686,7 +1675,7 @@ xfs_parseargs(
 	struct xfs_mount_args	*args,
 	int			update)
 {
-	struct vfs		*vfsp = bhvtovfs(bhv);
+	bhv_vfs_t		*vfsp = bhvtovfs(bhv);
 	char			*this_char, *value, *eov;
 	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
 	int			iosize;
@@ -1708,42 +1697,48 @@ xfs_parseargs(
 
 		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			args->logbufs = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			args->logbufsize = suffix_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			strncpy(args->logname, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			strncpy(args->mtpt, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			strncpy(args->rtname, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
@@ -1752,7 +1747,8 @@ xfs_parseargs(
 			args->iosizelog = (uint8_t) iosize;
 		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
@@ -1761,7 +1757,8 @@ xfs_parseargs(
 			args->iosizelog = ffs(iosize) - 1;
 		} else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
@@ -1782,7 +1779,8 @@ xfs_parseargs(
 		} else if (!strcmp(this_char, MNTOPT_INO64)) {
 			args->flags |= XFSMNT_INO64;
 #if !XFS_BIG_INUMS
-			printk("XFS: %s option not allowed on this system\n",
+			cmn_err(CE_WARN,
+				"XFS: %s option not allowed on this system",
 				this_char);
 			return EINVAL;
 #endif
@@ -1792,14 +1790,16 @@ xfs_parseargs(
 			args->flags |= XFSMNT_SWALLOC;
 		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
 			dsunit = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
 			if (!value || !*value) {
-				printk("XFS: %s option requires an argument\n",
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
 					this_char);
 				return EINVAL;
 			}
@@ -1807,7 +1807,8 @@ xfs_parseargs(
 		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
 			args->flags &= ~XFSMNT_32BITINODES;
 #if !XFS_BIG_INUMS
-			printk("XFS: %s option not allowed on this system\n",
+			cmn_err(CE_WARN,
+				"XFS: %s option not allowed on this system",
 				this_char);
 			return EINVAL;
 #endif
@@ -1831,36 +1832,41 @@ xfs_parseargs(
 			args->flags &= ~XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, "osyncisdsync")) {
 			/* no-op, this is now the default */
-printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
+			cmn_err(CE_WARN,
+	"XFS: osyncisdsync is now the default, option is deprecated.");
 		} else if (!strcmp(this_char, "irixsgid")) {
-printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
+			cmn_err(CE_WARN,
+	"XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
 		} else {
-			printk("XFS: unknown mount option [%s].\n", this_char);
+			cmn_err(CE_WARN,
+				"XFS: unknown mount option [%s].", this_char);
 			return EINVAL;
 		}
 	}
 
 	if (args->flags & XFSMNT_NORECOVERY) {
 		if ((vfsp->vfs_flag & VFS_RDONLY) == 0) {
-			printk("XFS: no-recovery mounts must be read-only.\n");
+			cmn_err(CE_WARN,
+				"XFS: no-recovery mounts must be read-only.");
 			return EINVAL;
 		}
 	}
 
 	if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
-		printk(
-	"XFS: sunit and swidth options incompatible with the noalign option\n");
+		cmn_err(CE_WARN,
+	"XFS: sunit and swidth options incompatible with the noalign option");
 		return EINVAL;
 	}
 
 	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-		printk("XFS: sunit and swidth must be specified together\n");
+		cmn_err(CE_WARN,
+			"XFS: sunit and swidth must be specified together");
 		return EINVAL;
 	}
 
 	if (dsunit && (dswidth % dsunit != 0)) {
-		printk(
-	"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n",
+		cmn_err(CE_WARN,
+	"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)",
 			dswidth, dsunit);
 		return EINVAL;
 	}
@@ -1907,7 +1913,7 @@ xfs_showargs(
 	};
 	struct proc_xfs_info	*xfs_infop;
 	struct xfs_mount	*mp = XFS_BHVTOM(bhv);
-	struct vfs		*vfsp = XFS_MTOVFS(mp);
+	struct bhv_vfs		*vfsp = XFS_MTOVFS(mp);
 
 	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
 		if (mp->m_flags & xfs_infop->flag)
@@ -1967,7 +1973,7 @@ xfs_freeze(
 }
 
 
-vfsops_t xfs_vfsops = {
+bhv_vfsops_t xfs_vfsops = {
 	BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
 	.vfs_parseargs		= xfs_parseargs,
 	.vfs_showargs		= xfs_showargs,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7027ae68ee3..00a6b7dc24a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -16,8 +16,6 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#include <linux/capability.h>
-
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -27,7 +25,6 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
@@ -35,13 +32,11 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_dir_leaf.h"
 #include "xfs_itable.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
@@ -58,32 +53,14 @@
 #include "xfs_log_priv.h"
 #include "xfs_mac.h"
 
-
-/*
- * The maximum pathlen is 1024 bytes. Since the minimum file system
- * blocksize is 512 bytes, we can get a max of 2 extents back from
- * bmapi.
- */
-#define SYMLINK_MAPS 2
-
-/*
- * For xfs, we check that the file isn't too big to be opened by this kernel.
- * No other open action is required for regular files.  Devices are handled
- * through the specfs file system, pipes through fifofs.  Device and
- * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
- * when a new vnode is first looked up or created.
- */
 STATIC int
 xfs_open(
 	bhv_desc_t	*bdp,
 	cred_t		*credp)
 {
 	int		mode;
-	vnode_t		*vp;
-	xfs_inode_t	*ip;
-
-	vp = BHV_TO_VNODE(bdp);
-	ip = XFS_BHVTOI(bdp);
+	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
+	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return XFS_ERROR(EIO);
@@ -101,6 +78,35 @@ xfs_open(
 	return 0;
 }
 
+STATIC int
+xfs_close(
+	bhv_desc_t	*bdp,
+	int		flags,
+	lastclose_t	lastclose,
+	cred_t		*credp)
+{
+	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
+	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return XFS_ERROR(EIO);
+
+	if (lastclose != L_TRUE || !VN_ISREG(vp))
+		return 0;
+
+	/*
+	 * If we previously truncated this file and removed old data in
+	 * the process, we want to initiate "early" writeout on the last
+	 * close.  This is an attempt to combat the notorious NULL files
+	 * problem which is particularly noticable from a truncate down,
+	 * buffered (re-)write (delalloc), followed by a crash.  What we
+	 * are effectively doing here is significantly reducing the time
+	 * window where we'd otherwise be exposed to that problem.
+	 */
+	if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
+		return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
+	return 0;
+}
 
 /*
  * xfs_getattr
@@ -108,13 +114,13 @@ xfs_open(
 STATIC int
 xfs_getattr(
 	bhv_desc_t	*bdp,
-	vattr_t		*vap,
+	bhv_vattr_t	*vap,
 	int		flags,
 	cred_t		*credp)
 {
 	xfs_inode_t	*ip;
 	xfs_mount_t	*mp;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp  = BHV_TO_VNODE(bdp);
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -241,7 +247,7 @@ xfs_getattr(
 int
 xfs_setattr(
 	bhv_desc_t		*bdp,
-	vattr_t			*vap,
+	bhv_vattr_t		*vap,
 	int			flags,
 	cred_t			*credp)
 {
@@ -255,7 +261,7 @@ xfs_setattr(
 	uid_t			uid=0, iuid=0;
 	gid_t			gid=0, igid=0;
 	int			timeflags = 0;
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	xfs_prid_t		projid=0, iprojid=0;
 	int			mandlock_before, mandlock_after;
 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
@@ -347,7 +353,6 @@ xfs_setattr(
 	 */
 	tp = NULL;
 	lock_flags = XFS_ILOCK_EXCL;
-	ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1);
 	if (flags & ATTR_NOLOCK)
 		need_iolock = 0;
 	if (!(mask & XFS_AT_SIZE)) {
@@ -666,9 +671,17 @@ xfs_setattr(
 					    ((ip->i_d.di_nlink != 0 ||
 					      !(mp->m_flags & XFS_MOUNT_WSYNC))
 					     ? 1 : 0));
-			if (code) {
+			if (code)
 				goto abort_return;
-			}
+			/*
+			 * Truncated "down", so we're removing references
+			 * to old data here - if we now delay flushing for
+			 * a long time, we expose ourselves unduly to the
+			 * notorious NULL files problem.  So, we mark this
+			 * vnode and flush it when the file is closed, and
+			 * do not wait the usual (long) time for writeout.
+			 */
+			VTRUNCATE(vp);
 		}
 		/*
 		 * Have to do this even if the file's size doesn't change.
@@ -800,6 +813,8 @@ xfs_setattr(
 				di_flags |= XFS_DIFLAG_NODUMP;
 			if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
 				di_flags |= XFS_DIFLAG_PROJINHERIT;
+			if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
+				di_flags |= XFS_DIFLAG_NODEFRAG;
 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -869,7 +884,7 @@ xfs_setattr(
 	 */
 	mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
 	if (mandlock_before != mandlock_after) {
-		VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
+		bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING,
 				 mandlock_after);
 	}
 
@@ -936,6 +951,13 @@ xfs_access(
 
 
 /*
+ * The maximum pathlen is 1024 bytes. Since the minimum file system
+ * blocksize is 512 bytes, we can get a max of 2 extents back from
+ * bmapi.
+ */
+#define SYMLINK_MAPS 2
+
+/*
  * xfs_readlink
  *
  */
@@ -950,7 +972,7 @@ xfs_readlink(
 	int		count;
 	xfs_off_t	offset;
 	int		pathlen;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 	int		error = 0;
 	xfs_mount_t	*mp;
 	int             nmaps;
@@ -1000,7 +1022,7 @@ xfs_readlink(
 		nmaps = SYMLINK_MAPS;
 
 		error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
-				  0, NULL, 0, mval, &nmaps, NULL);
+				  0, NULL, 0, mval, &nmaps, NULL, NULL);
 
 		if (error) {
 			goto error_return;
@@ -1208,8 +1230,8 @@ xfs_inactive_free_eofblocks(
 
 	nimaps = 1;
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
-			  NULL, 0, &imap, &nimaps, NULL);
+	error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0,
+			  NULL, 0, &imap, &nimaps, NULL, NULL);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	if (!error && (nimaps != 0) &&
@@ -1338,7 +1360,7 @@ xfs_inactive_symlink_rmt(
 	nmaps = ARRAY_SIZE(mval);
 	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
 			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
-			&free_list)))
+			&free_list, NULL)))
 		goto error0;
 	/*
 	 * Invalidate the block(s).
@@ -1353,7 +1375,7 @@ xfs_inactive_symlink_rmt(
 	 * Unmap the dead block(s) to the free_list.
 	 */
 	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
-			&first_block, &free_list, &done)))
+			&first_block, &free_list, NULL, &done)))
 		goto error1;
 	ASSERT(done);
 	/*
@@ -1469,9 +1491,6 @@ xfs_inactive_symlink_local(
 	return 0;
 }
 
-/*
- *
- */
 STATIC int
 xfs_inactive_attrs(
 	xfs_inode_t	*ip,
@@ -1524,16 +1543,16 @@ xfs_release(
 	bhv_desc_t	*bdp)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 	xfs_mount_t	*mp;
 	int		error;
 
 	vp = BHV_TO_VNODE(bdp);
 	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
 
-	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
+	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
 		return 0;
-	}
 
 	/* If this is a read-only mount, don't do this (would generate I/O) */
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
@@ -1545,8 +1564,6 @@ xfs_release(
 		return 0;
 #endif
 
-	mp = ip->i_mount;
-
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
 		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1579,8 +1596,8 @@ xfs_inactive(
 	cred_t		*credp)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
-	xfs_bmap_free_t	free_list; 
+	bhv_vnode_t	*vp;
+	xfs_bmap_free_t	free_list;
 	xfs_fsblock_t	first_block;
 	int		committed;
 	xfs_trans_t	*tp;
@@ -1760,7 +1777,7 @@ xfs_inactive(
 			cmn_err(CE_NOTE,
 		"xfs_inactive:	xfs_ifree() returned an error = %d on %s",
 				error, mp->m_fsname);
-			xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
+			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 		}
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
 	} else {
@@ -1795,17 +1812,17 @@ xfs_inactive(
 STATIC int
 xfs_lookup(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
-	vnode_t			**vpp,
+	bhv_vname_t		*dentry,
+	bhv_vnode_t		**vpp,
 	int			flags,
-	vnode_t			*rdir,
+	bhv_vnode_t		*rdir,
 	cred_t			*credp)
 {
 	xfs_inode_t		*dp, *ip;
 	xfs_ino_t		e_inum;
 	int			error;
 	uint			lock_mode;
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 
 	dir_vp = BHV_TO_VNODE(dir_bdp);
 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
@@ -1832,15 +1849,15 @@ xfs_lookup(
 STATIC int
 xfs_create(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
-	vattr_t			*vap,
-	vnode_t			**vpp,
+	bhv_vname_t		*dentry,
+	bhv_vattr_t		*vap,
+	bhv_vnode_t		**vpp,
 	cred_t			*credp)
 {
 	char			*name = VNAME(dentry);
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 	xfs_inode_t		*dp, *ip;
-	vnode_t		        *vp=NULL;
+	bhv_vnode_t	        *vp = NULL;
 	xfs_trans_t		*tp;
 	xfs_mount_t	        *mp;
 	xfs_dev_t		rdev;
@@ -1938,8 +1955,7 @@ xfs_create(
 	if (error)
 		goto error_return;
 
-	if (resblks == 0 &&
-	    (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
+	if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen)))
 		goto error_return;
 	rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
 	error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
@@ -1970,9 +1986,9 @@ xfs_create(
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	dp_joined_to_trans = B_TRUE;
 
-	error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino,
-		&first_block, &free_list,
-		resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
+	error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino,
+					&first_block, &free_list, resblks ?
+					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
 		ASSERT(error != ENOSPC);
 		goto abort_return;
@@ -2026,7 +2042,7 @@ xfs_create(
 	 * Propagate the fact that the vnode changed after the
 	 * xfs_inode locks have been released.
 	 */
-	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
+	bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3);
 
 	*vpp = vp;
 
@@ -2107,7 +2123,7 @@ int xfs_rm_attempts;
 STATIC int
 xfs_lock_dir_and_entry(
 	xfs_inode_t	*dp,
-	vname_t		*dentry,
+	bhv_vname_t	*dentry,
 	xfs_inode_t	*ip)	/* inode of entry 'name' */
 {
 	int		attempts;
@@ -2321,10 +2337,10 @@ int remove_which_error_return = 0;
 STATIC int
 xfs_remove(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
+	bhv_vname_t		*dentry,
 	cred_t			*credp)
 {
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 	char			*name = VNAME(dentry);
 	xfs_inode_t             *dp, *ip;
 	xfs_trans_t             *tp = NULL;
@@ -2448,8 +2464,8 @@ xfs_remove(
 	 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
 	 */
 	XFS_BMAP_INIT(&free_list, &first_block);
-	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino,
-		&first_block, &free_list, 0);
+	error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino,
+					&first_block, &free_list, 0);
 	if (error) {
 		ASSERT(error != ENOENT);
 		REMOVE_DEBUG_TRACE(__LINE__);
@@ -2511,7 +2527,7 @@ xfs_remove(
 	/*
 	 * Let interposed file systems know about removed links.
 	 */
-	VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero);
+	bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero);
 
 	IRELE(ip);
 
@@ -2564,8 +2580,8 @@ xfs_remove(
 STATIC int
 xfs_link(
 	bhv_desc_t		*target_dir_bdp,
-	vnode_t			*src_vp,
-	vname_t			*dentry,
+	bhv_vnode_t		*src_vp,
+	bhv_vname_t		*dentry,
 	cred_t			*credp)
 {
 	xfs_inode_t		*tdp, *sip;
@@ -2577,7 +2593,7 @@ xfs_link(
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	vnode_t			*target_dir_vp;
+	bhv_vnode_t		*target_dir_vp;
 	int			resblks;
 	char			*target_name = VNAME(dentry);
 	int			target_namelen;
@@ -2668,13 +2684,12 @@ xfs_link(
 	}
 
 	if (resblks == 0 &&
-	    (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name,
-			target_namelen)))
+	    (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen)))
 		goto error_return;
 
 	XFS_BMAP_INIT(&free_list, &first_block);
 
-	error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen,
+	error = xfs_dir_createname(tp, tdp, target_name, target_namelen,
 				   sip->i_ino, &first_block, &free_list,
 				   resblks);
 	if (error)
@@ -2734,15 +2749,15 @@ std_return:
 STATIC int
 xfs_mkdir(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
-	vattr_t			*vap,
-	vnode_t			**vpp,
+	bhv_vname_t		*dentry,
+	bhv_vattr_t		*vap,
+	bhv_vnode_t		**vpp,
 	cred_t			*credp)
 {
 	char			*dir_name = VNAME(dentry);
 	xfs_inode_t             *dp;
 	xfs_inode_t		*cdp;	/* inode of created dir */
-	vnode_t			*cvp;	/* vnode of created dir */
+	bhv_vnode_t		*cvp;	/* vnode of created dir */
 	xfs_trans_t		*tp;
 	xfs_mount_t		*mp;
 	int			cancel_flags;
@@ -2750,7 +2765,7 @@ xfs_mkdir(
 	int			committed;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 	boolean_t		dp_joined_to_trans;
 	boolean_t		created = B_FALSE;
 	int			dm_event_sent = 0;
@@ -2840,7 +2855,7 @@ xfs_mkdir(
 		goto error_return;
 
 	if (resblks == 0 &&
-	    (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen)))
+	    (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen)))
 		goto error_return;
 	/*
 	 * create the directory inode.
@@ -2867,9 +2882,9 @@ xfs_mkdir(
 
 	XFS_BMAP_INIT(&free_list, &first_block);
 
-	error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen,
-			cdp->i_ino, &first_block, &free_list,
-			resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
+	error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino,
+				   &first_block, &free_list, resblks ?
+				   resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
 		ASSERT(error != ENOSPC);
 		goto error1;
@@ -2883,16 +2898,14 @@ xfs_mkdir(
 	 */
 	dp->i_gen++;
 
-	error = XFS_DIR_INIT(mp, tp, cdp, dp);
-	if (error) {
+	error = xfs_dir_init(tp, cdp, dp);
+	if (error)
 		goto error2;
-	}
 
 	cdp->i_gen = 1;
 	error = xfs_bumplink(tp, dp);
-	if (error) {
+	if (error)
 		goto error2;
-	}
 
 	cvp = XFS_ITOV(cdp);
 
@@ -2969,7 +2982,7 @@ std_return:
 STATIC int
 xfs_rmdir(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
+	bhv_vname_t		*dentry,
 	cred_t			*credp)
 {
 	char			*name = VNAME(dentry);
@@ -2982,7 +2995,7 @@ xfs_rmdir(
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 	int			dm_di_mode = 0;
 	int			last_cdp_link;
 	int			namelen;
@@ -3101,16 +3114,15 @@ xfs_rmdir(
 		error = XFS_ERROR(ENOTEMPTY);
 		goto error_return;
 	}
-	if (!XFS_DIR_ISEMPTY(mp, cdp)) {
+	if (!xfs_dir_isempty(cdp)) {
 		error = XFS_ERROR(ENOTEMPTY);
 		goto error_return;
 	}
 
-	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino,
-		&first_block, &free_list, resblks);
-	if (error) {
+	error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino,
+					&first_block, &free_list, resblks);
+	if (error)
 		goto error1;
-	}
 
 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
@@ -3181,7 +3193,7 @@ xfs_rmdir(
 	/*
 	 * Let interposed file systems know about removed links.
 	 */
-	VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link);
+	bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link);
 
 	IRELE(cdp);
 
@@ -3209,8 +3221,6 @@ xfs_rmdir(
 
 
 /*
- * xfs_readdir
- *
  * Read dp's entries starting at uiop->uio_offset and translate them into
  * bufsize bytes worth of struct dirents starting at bufbase.
  */
@@ -3230,28 +3240,23 @@ xfs_readdir(
 					       (inst_t *)__return_address);
 	dp = XFS_BHVTOI(dir_bdp);
 
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) {
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return XFS_ERROR(EIO);
-	}
 
 	lock_mode = xfs_ilock_map_shared(dp);
-	error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
+	error = xfs_dir_getdents(tp, dp, uiop, eofp);
 	xfs_iunlock_map_shared(dp, lock_mode);
 	return error;
 }
 
 
-/*
- * xfs_symlink
- *
- */
 STATIC int
 xfs_symlink(
 	bhv_desc_t		*dir_bdp,
-	vname_t			*dentry,
-	vattr_t			*vap,
+	bhv_vname_t		*dentry,
+	bhv_vattr_t		*vap,
 	char			*target_path,
-	vnode_t			**vpp,
+	bhv_vnode_t		**vpp,
 	cred_t			*credp)
 {
 	xfs_trans_t		*tp;
@@ -3263,7 +3268,7 @@ xfs_symlink(
 	xfs_bmap_free_t		free_list;
 	xfs_fsblock_t		first_block;
 	boolean_t		dp_joined_to_trans;
-	vnode_t			*dir_vp;
+	bhv_vnode_t		*dir_vp;
 	uint			cancel_flags;
 	int			committed;
 	xfs_fileoff_t		first_fsb;
@@ -3308,7 +3313,7 @@ xfs_symlink(
 		int len, total;
 		char *path;
 
-		for(total = 0, path = target_path; total < pathlen;) {
+		for (total = 0, path = target_path; total < pathlen;) {
 			/*
 			 * Skip any slashes.
 			 */
@@ -3402,7 +3407,7 @@ xfs_symlink(
 	 * Check for ability to enter directory entry, if no space reserved.
 	 */
 	if (resblks == 0 &&
-	    (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen)))
+	    (error = xfs_dir_canenter(tp, dp, link_name, link_namelen)))
 		goto error_return;
 	/*
 	 * Initialize the bmap freelist prior to calling either
@@ -3457,7 +3462,7 @@ xfs_symlink(
 		error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
 				  XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
 				  &first_block, resblks, mval, &nmaps,
-				  &free_list);
+				  &free_list, NULL);
 		if (error) {
 			goto error1;
 		}
@@ -3489,11 +3494,10 @@ xfs_symlink(
 	/*
 	 * Create the directory entry for the symlink.
 	 */
-	error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen,
-			ip->i_ino, &first_block, &free_list, resblks);
-	if (error) {
+	error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino,
+				   &first_block, &free_list, resblks);
+	if (error)
 		goto error1;
-	}
 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
@@ -3541,7 +3545,7 @@ std_return:
 	}
 
 	if (!error) {
-		vnode_t *vp;
+		bhv_vnode_t *vp;
 
 		ASSERT(ip);
 		vp = XFS_ITOV(ip);
@@ -3606,10 +3610,10 @@ xfs_fid2(
 int
 xfs_rwlock(
 	bhv_desc_t	*bdp,
-	vrwlock_t	locktype)
+	bhv_vrwlock_t	locktype)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = BHV_TO_VNODE(bdp);
 	if (VN_ISDIR(vp))
@@ -3637,10 +3641,10 @@ xfs_rwlock(
 void
 xfs_rwunlock(
 	bhv_desc_t	*bdp,
-	vrwlock_t	locktype)
+	bhv_vrwlock_t	locktype)
 {
 	xfs_inode_t     *ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = BHV_TO_VNODE(bdp);
 	if (VN_ISDIR(vp))
@@ -3744,7 +3748,6 @@ xfs_inode_flush(
 	return error;
 }
 
-
 int
 xfs_set_dmattrs (
 	bhv_desc_t	*bdp,
@@ -3785,16 +3788,12 @@ xfs_set_dmattrs (
 	return error;
 }
 
-
-/*
- * xfs_reclaim
- */
 STATIC int
 xfs_reclaim(
 	bhv_desc_t	*bdp)
 {
 	xfs_inode_t	*ip;
-	vnode_t		*vp;
+	bhv_vnode_t	*vp;
 
 	vp = BHV_TO_VNODE(bdp);
 	ip = XFS_BHVTOI(bdp);
@@ -3849,7 +3848,7 @@ xfs_finish_reclaim(
 	int		sync_mode)
 {
 	xfs_ihash_t	*ih = ip->i_hash;
-	vnode_t		*vp = XFS_ITOV_NULL(ip);
+	bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
 	int		error;
 
 	if (vp && VN_BAD(vp))
@@ -4116,10 +4115,10 @@ retry:
 		 * Issue the xfs_bmapi() call to allocate the blocks
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
-		error = xfs_bmapi(tp, ip, startoffset_fsb,
+		error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
 				  allocatesize_fsb, bmapi_flag,
 				  &firstfsb, 0, imapp, &nimaps,
-				  &free_list);
+				  &free_list, NULL);
 		if (error) {
 			goto error0;
 		}
@@ -4199,8 +4198,8 @@ xfs_zero_remaining_bytes(
 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
 		offset_fsb = XFS_B_TO_FSBT(mp, offset);
 		nimap = 1;
-		error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap,
-			&nimap, NULL);
+		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0,
+			NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error || nimap < 1)
 			break;
 		ASSERT(imap.br_blockcount >= 1);
@@ -4259,7 +4258,7 @@ xfs_free_file_space(
 	xfs_off_t		len,
 	int			attr_flags)
 {
-	vnode_t			*vp;
+	bhv_vnode_t		*vp;
 	int			committed;
 	int			done;
 	xfs_off_t		end_dmi_offset;
@@ -4308,7 +4307,6 @@ xfs_free_file_space(
 			return error;
 	}
 
-	ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
 	if (attr_flags & ATTR_NOLOCK)
 		need_iolock = 0;
 	if (need_iolock) {
@@ -4326,7 +4324,7 @@ xfs_free_file_space(
 	if (VN_CACHED(vp) != 0) {
 		xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
 				ctooff(offtoct(ioffset)), -1);
-		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)),
+		bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)),
 				-1, FI_REMAPF_LOCKED);
 	}
 
@@ -4338,8 +4336,8 @@ xfs_free_file_space(
 	 */
 	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
 		nimap = 1;
-		error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0,
-			&imap, &nimap, NULL);
+		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb,
+			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error)
 			goto out_unlock_iolock;
 		ASSERT(nimap == 0 || nimap == 1);
@@ -4353,8 +4351,8 @@ xfs_free_file_space(
 				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
 		}
 		nimap = 1;
-		error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0,
-			&imap, &nimap, NULL);
+		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1,
+			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error)
 			goto out_unlock_iolock;
 		ASSERT(nimap == 0 || nimap == 1);
@@ -4426,9 +4424,9 @@ xfs_free_file_space(
 		 * issue the bunmapi() call to free the blocks
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
-		error = xfs_bunmapi(tp, ip, startoffset_fsb,
+		error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
 				  endoffset_fsb - startoffset_fsb,
-				  0, 2, &firstfsb, &free_list, &done);
+				  0, 2, &firstfsb, &free_list, NULL, &done);
 		if (error) {
 			goto error0;
 		}
@@ -4488,8 +4486,8 @@ xfs_change_file_space(
 	xfs_off_t	startoffset;
 	xfs_off_t	llen;
 	xfs_trans_t	*tp;
-	vattr_t		va;
-	vnode_t		*vp;
+	bhv_vattr_t	va;
+	bhv_vnode_t	*vp;
 
 	vp = BHV_TO_VNODE(bdp);
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -4642,9 +4640,10 @@ xfs_change_file_space(
 	return error;
 }
 
-vnodeops_t xfs_vnodeops = {
+bhv_vnodeops_t xfs_vnodeops = {
 	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
 	.vop_open		= xfs_open,
+	.vop_close		= xfs_close,
 	.vop_read		= xfs_read,
 #ifdef HAVE_SENDFILE
 	.vop_sendfile		= xfs_sendfile,