24 files changed, 364 insertions, 217 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 37534573960..045f98854f1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
+{
+	int ret = 0;
+	struct inode *bd_inode = bdev->bd_inode;
+	struct gendisk *disk = bdev->bd_disk;
+
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
+	lock_kernel();
+	if (!--bdev->bd_openers) {
+		sync_blockdev(bdev);
+		kill_bdev(bdev);
+	}
+	if (bdev->bd_contains == bdev) {
+		if (disk->fops->release)
+			ret = disk->fops->release(bd_inode, NULL);
+	} else {
+		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+				  subclass + 1);
+		bdev->bd_contains->bd_part_count--;
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
+	}
+	if (!bdev->bd_openers) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+
+		if (bdev->bd_contains != bdev) {
+			kobject_put(&bdev->bd_part->kobj);
+			bdev->bd_part = NULL;
+		}
+		bdev->bd_disk = NULL;
+		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+		if (bdev != bdev->bd_contains)
+			__blkdev_put(bdev->bd_contains, subclass + 1);
+		bdev->bd_contains = NULL;
+	}
+	unlock_kernel();
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+EXPORT_SYMBOL(blkdev_put);
+
+int blkdev_put_partition(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+EXPORT_SYMBOL(blkdev_put_partition);
+
 static int
 blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
 
@@ -980,7 +1035,7 @@ out_first:
 	bdev->bd_disk = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
-		blkdev_put(bdev->bd_contains);
+		__blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
@@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
-	int ret = 0;
-	struct inode *bd_inode = bdev->bd_inode;
-	struct gendisk *disk = bdev->bd_disk;
-
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-	lock_kernel();
-	if (!--bdev->bd_openers) {
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-	}
-	if (bdev->bd_contains == bdev) {
-		if (disk->fops->release)
-			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-				  subclass + 1);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
-	}
-	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
-		put_disk(disk);
-		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->kobj);
-			bdev->bd_part = NULL;
-		}
-		bdev->bd_disk = NULL;
-		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains)
-			__blkdev_put(bdev->bd_contains, subclass + 1);
-		bdev->bd_contains = NULL;
-	}
-	unlock_kernel();
-	mutex_unlock(&bdev->bd_mutex);
-	bdput(bdev);
-	return ret;
-}
-
-int blkdev_put(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-
-EXPORT_SYMBOL(blkdev_put);
-
-int blkdev_put_partition(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-
-EXPORT_SYMBOL(blkdev_put_partition);
-
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 19ffb043abb..3a3567433b9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 eexit_1:
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
-		     current, ep, epi->file, error));
+		     current, ep, epi->ffd.file, error));
 
 	return error;
 }
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct eventpoll *ep = epi->ep;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
-		     current, epi->file, epi, ep));
+		     current, epi->ffd.file, epi, ep));
 
 	write_lock_irqsave(&ep->lock, flags);
 
diff --git a/fs/exec.c b/fs/exec.c
index 8344ba73a2a..54135df2a96 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name)
 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 		    S_ISREG(inode->i_mode)) {
 			int err = vfs_permission(&nd, MAY_EXEC);
-			if (!err && !(inode->i_mode & 0111))
-				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd, O_RDONLY);
@@ -753,7 +751,7 @@ no_thread_group:
 
 		write_lock_irq(&tasklist_lock);
 		spin_lock(&oldsighand->siglock);
-		spin_lock(&newsighand->siglock);
+		spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
 
 		rcu_assign_pointer(current->sighand, newsighand);
 		recalc_sigpending();
@@ -922,12 +920,6 @@ int prepare_binprm(struct linux_binprm *bprm)
 	int retval;
 
 	mode = inode->i_mode;
-	/*
-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-	 * generic_permission lets a non-executable through
-	 */
-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
-		return -EACCES;
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f2702cda977..681dea8f953 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT2_INODE_SIZE(sb) == 0)
 		goto cantfind_ext2;
 	sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
-	if (sbi->s_inodes_per_block == 0)
+	if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
 		goto cantfind_ext2;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a504a40d6d2..063d994bda0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 		goal = le32_to_cpu(es->s_first_data_block);
 	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
 			EXT3_BLOCKS_PER_GROUP(sb);
+	goal_group = group_no;
+retry_alloc:
 	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
 	if (!gdp)
 		goto io_error;
 
-	goal_group = group_no;
-retry:
 	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 	/*
 	 * if there is not enough free blocks to make a new resevation
@@ -1349,7 +1349,7 @@ retry:
 	if (my_rsv) {
 		my_rsv = NULL;
 		group_no = goal_group;
-		goto retry;
+		goto retry_alloc;
 	}
 	/* No space left on the device */
 	*errp = -ENOSPC;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 93aa5715f22..78b1deae3fa 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	task->ioprio = ioprio;
 
 	ioc = task->io_context;
+	/* see wmb() in current_io_context() */
+	smp_read_barrier_depends();
+
 	if (ioc && ioc->set_ioprio)
 		ioc->set_ioprio(ioc, ioprio);
 
@@ -111,9 +114,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
-					break;
+					goto free_uid;
 			} while_each_thread(g, p);
-
+free_uid:
 			if (who)
 				free_uid(user);
 			break;
@@ -137,6 +140,29 @@ out:
 	return ret;
 }
 
+int ioprio_best(unsigned short aprio, unsigned short bprio)
+{
+	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
+	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+
+	if (!ioprio_valid(aprio))
+		return bprio;
+	if (!ioprio_valid(bprio))
+		return aprio;
+
+	if (aclass == IOPRIO_CLASS_NONE)
+		aclass = IOPRIO_CLASS_BE;
+	if (bclass == IOPRIO_CLASS_NONE)
+		bclass = IOPRIO_CLASS_BE;
+
+	if (aclass == bclass)
+		return min(aprio, bprio);
+	if (aclass > bclass)
+		return bprio;
+	else
+		return aprio;
+}
+
 asmlinkage long sys_ioprio_get(int which, int who)
 {
 	struct task_struct *g, *p;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 0971814c38b..42da6078431 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			kfree(jh->b_committed_data);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -745,14 +745,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			kfree(jh->b_committed_data);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			kfree(jh->b_frozen_data);
+			jbd_slab_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 8c9b28dff11..f66724ce443 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			kfree(tmp);
+			jbd_slab_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal)
 int journal_load(journal_t *journal)
 {
 	int err;
+	journal_superblock_t *sb;
 
 	err = load_superblock(journal);
 	if (err)
 		return err;
 
+	sb = journal->j_superblock;
 	/* If this is a V2 superblock, then we have to check the
 	 * features flags on it. */
 
 	if (journal->j_format_version >= 2) {
-		journal_superblock_t *sb = journal->j_superblock;
-
 		if ((sb->s_feature_ro_compat &
 		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal)
 		}
 	}
 
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+	if (err)
+		return err;
+
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (journal_recover(journal))
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 }
 
 /*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size)  (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD_MAX_SLABS; i++) {
+		if (jbd_slab[i])
+			kmem_cache_destroy(jbd_slab[i]);
+		jbd_slab[i] = NULL;
+	}
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+	int i = JBD_SLAB_INDEX(slab_size);
+
+	BUG_ON(i >= JBD_MAX_SLABS);
+
+	/*
+	 * Check if we already have a slab created for this size
+	 */
+	if (jbd_slab[i])
+		return 0;
+
+	/*
+	 * Create a slab and force alignment to be same as slabsize -
+	 * this will make sure that allocations won't cross the page
+	 * boundary.
+	 */
+	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+				slab_size, slab_size, 0, NULL, NULL);
+	if (!jbd_slab[i]) {
+		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr,  size_t size)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
  * Journal_head storage management
  */
 static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				kfree(jh->b_frozen_data);
+				jbd_slab_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				kfree(jh->b_committed_data);
+				jbd_slab_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void)
 	journal_destroy_revoke_caches();
 	journal_destroy_journal_head_cache();
 	journal_destroy_handle_cache();
+	journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 508b2ea91f4..de2e4cbbf79 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -666,8 +666,9 @@ repeat:
 			if (!frozen_buffer) {
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
-				frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
-							    GFP_NOFS);
+				frozen_buffer =
+					jbd_slab_alloc(jh2bh(jh)->b_size,
+							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
 					       "%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -906,7 +907,7 @@ repeat:
 out:
 	journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		kfree(committed_data);
+		jbd_slab_free(committed_data, bh->b_size);
 	return err;
 }
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2a4df9b3779..01b4db9e546 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -237,19 +237,22 @@ static int
 nlm_traverse_files(struct nlm_host *host, int action)
 {
 	struct nlm_file	*file, **fp;
-	int		i;
+	int i, ret = 0;
 
 	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
 		fp = nlm_files + i;
 		while ((file = *fp) != NULL) {
+			file->f_count++;
+			mutex_unlock(&nlm_file_mutex);
+
 			/* Traverse locks, blocks and shares of this file
 			 * and update file->f_locks count */
-			if (nlm_inspect_file(host, file, action)) {
-				mutex_unlock(&nlm_file_mutex);
-				return 1;
-			}
+			if (nlm_inspect_file(host, file, action))
+				ret = 1;
 
+			mutex_lock(&nlm_file_mutex);
+			file->f_count--;
 			/* No more references to this file. Let go of it. */
 			if (!file->f_blocks && !file->f_locks
 			 && !file->f_shares && !file->f_count) {
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
 		}
 	}
 	mutex_unlock(&nlm_file_mutex);
-	return 0;
+	return ret;
 }
 
 /*
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ea91c5eeb7..330ff9fc7cf 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	/*
 	 * Allocate the buffer map to keep the superblock small.
 	 */
+	if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+		goto out_illegal_sb;
 	i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
 	map = kmalloc(i, GFP_KERNEL);
 	if (!map)
@@ -263,7 +265,7 @@ out_no_root:
 
 out_no_bitmap:
 	printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
-    out_freemap:
+out_freemap:
 	for (i = 0; i < sbi->s_imap_blocks; i++)
 		brelse(sbi->s_imap[i]);
 	for (i = 0; i < sbi->s_zmap_blocks; i++)
@@ -276,11 +278,16 @@ out_no_map:
 		printk("MINIX-fs: can't allocate map\n");
 	goto out_release;
 
+out_illegal_sb:
+	if (!silent)
+		printk("MINIX-fs: bad superblock\n");
+	goto out_release;
+
 out_no_fs:
 	if (!silent)
 		printk("VFS: Can't find a Minix or Minix V2 filesystem "
 			"on device %s\n", s->s_id);
-    out_release:
+out_release:
 	brelse(bh);
 	goto out;
 
@@ -290,7 +297,7 @@ out_bad_hblock:
 
 out_bad_sb:
 	printk("MINIX-fs: unable to read superblock\n");
- out:
+out:
 	s->s_fs_info = NULL;
 	kfree(sbi);
 	return -EINVAL;
diff --git a/fs/namei.c b/fs/namei.c
index 55a131230f9..432d6bc6fab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
 
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
+	umode_t mode = inode->i_mode;
 	int retval, submask;
 
 	if (mask & MAY_WRITE) {
-		umode_t mode = inode->i_mode;
 
 		/*
 		 * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 
+	/*
+	 * MAY_EXEC on regular files requires special handling: We override
+	 * filesystem execute permissions if the mode bits aren't set.
+	 */
+	if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+		return -EACCES;
+
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission)
@@ -1767,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+	nd->flags |= LOOKUP_CREATE;
+	nd->intent.open.flags = O_EXCL;
 
 	/*
 	 * Do the final lookup.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874ad5a..48e892880d5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	if (gfp & __GFP_FS)
+		return !nfs_wb_page(page->mapping->host, page);
+	else
+		/*
+		 * Avoid deadlock on nfs_wait_on_request().
+		 */
+		return 0;
 }
 
 const struct address_space_operations nfs_file_aops = {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed3c90..07a5dd57646 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -130,9 +130,7 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
-	dput(idmap->idmap_dentry);
-	idmap->idmap_dentry = NULL;
-	rpc_unlink(idmap->idmap_path);
+	rpc_unlink(idmap->idmap_dentry);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6ee97f19d8..153898e1331 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2668,7 +2668,7 @@ out:
 	nfs4_set_cached_acl(inode, acl);
 }
 
-static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
 	struct page *pages[NFS4ACL_MAXPAGES];
 	struct nfs_getaclargs args = {
@@ -2721,6 +2721,19 @@ out_free:
 	return ret;
 }
 
+static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	ssize_t ret;
+	do {
+		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+		if (ret >= 0)
+			break;
+		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+	} while (exception.retry);
+	return ret;
+}
+
 static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	return nfs4_get_acl_uncached(inode, buf, buflen);
 }
 
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct page *pages[NFS4ACL_MAXPAGES];
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 	return ret;
 }
 
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				__nfs4_proc_set_acl(inode, buf, buflen),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d996f49..730ec8fb31c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	struct kvec	*iov = rcvbuf->head;
 	unsigned int	nr, pglen = rcvbuf->page_len;
 	uint32_t	*end, *entry, *p, *kaddr;
-	uint32_t	len, attrlen;
+	uint32_t	len, attrlen, xlen;
 	int 		hdrlen, recvd, status;
 
 	status = decode_op_hdr(xdr, OP_READDIR);
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 
 	BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
 	kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
-	end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
-		if (p + 3 > end)
+		if (end - p < 3)
 			goto short_pkt;
 		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 		}
-		dprintk("filename = %*s\n", len, (char *)p);
-		p += XDR_QUADLEN(len);
-		if (p + 1 > end)
+		xlen = XDR_QUADLEN(len);
+		if (end - p < xlen + 1)
 			goto short_pkt;
+		dprintk("filename = %*s\n", len, (char *)p);
+		p += xlen;
 		len = ntohl(*p++);	/* bitmap length */
-		p += len;
-		if (p + 1 > end)
+		if (end - p < len + 1)
 			goto short_pkt;
+		p += len;
 		attrlen = XDR_QUADLEN(ntohl(*p++));
-		p += attrlen;		/* attributes */
-		if (p + 2 > end)
+		if (end - p < attrlen + 2)
 			goto short_pkt;
+		p += attrlen;		/* attributes */
 		entry = p;
 	}
 	if (!nr && (entry[0] != 0 || entry[1] == 0))
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 65c0c5b3235..da9cf11c326 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
 	base &= ~PAGE_CACHE_MASK;
 	pglen = PAGE_CACHE_SIZE - base;
-	if (pglen < remainder)
+	for (;;) {
+		if (remainder <= pglen) {
+			memclear_highpage_flush(*pages, base, remainder);
+			break;
+		}
 		memclear_highpage_flush(*pages, base, pglen);
-	else
-		memclear_highpage_flush(*pages, base, remainder);
+		pages++;
+		remainder -= pglen;
+		pglen = PAGE_CACHE_SIZE;
+		base = 0;
+	}
 }
 
 /*
@@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	unsigned int base = data->args.pgbase;
 	struct page **pages;
 
+	if (data->res.eof)
+		count = data->args.count;
 	if (unlikely(count == 0))
 		return;
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
@@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	/*
-	 * Was this an eof or a short read? If the latter, don't mark the page
-	 * as uptodate yet.
-	 */
-	if (count > 0 && (data->res.eof || data->args.count == data->res.count))
+	if (count != 0)
 		SetPageUptodate(*pages);
 }
 
@@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageError(*pages);
+	if (count != 0)
+		SetPageError(*pages);
 }
 
 /*
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index abe91ca03ed..0a5927c806c 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
 	spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
 	for (i = 0; i < 8; i++, p++) {
 		unsigned long st_sector;
-		int num_sectors;
+		unsigned int num_sectors;
 
 		st_sector = be32_to_cpu(p->start_cylinder) * spc;
 		num_sectors = be32_to_cpu(p->num_sectors);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9f2cfc30f9c..94215622544 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"Mapped:       %8lu kB\n"
 		"Slab:         %8lu kB\n"
 		"PageTables:   %8lu kB\n"
-		"NFS Unstable: %8lu kB\n"
+		"NFS_Unstable: %8lu kB\n"
 		"Bounce:       %8lu kB\n"
 		"CommitLimit:  %8lu kB\n"
 		"Committed_AS: %8lu kB\n"
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 39fedaa88a0..d935fb9394e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
 //        down(&inode->i_zombie);
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7de172efa08..fcce1a21a51 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1659,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 		iput(inode);
 		goto error_out;
 	}
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_maxbytes = 1<<30;
 	return 0;
 
 error_out:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index e1b0e8cfecb..0abd66ce36e 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode)
 	{
 		if (offset)
 		{
-			extoffset -= adsize;
-			etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
-			if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
-			{
-				extoffset -= adsize;
-				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
-				udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+			/*
+			 *  OK, there is not extent covering inode->i_size and
+			 *  no extent above inode->i_size => truncate is
+			 *  extending the file by 'offset'.
+			 */
+			if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) ||
+			    (bh && extoffset == sizeof(struct allocExtDesc))) {
+				/* File has no extents at all! */
+				memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+				udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 			}
-			else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
-			{
-				kernel_lb_addr neloc = { 0, 0 };
+			else {
 				extoffset -= adsize;
-				nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
-					((elen + offset + inode->i_sb->s_blocksize - 1) &
-					~(inode->i_sb->s_blocksize - 1));
-				udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
-				udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
-			}
-			else
-			{
-				if (elen & (inode->i_sb->s_blocksize - 1))
+				etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
+				if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
+				{
+					extoffset -= adsize;
+					elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
+					udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+				}
+				else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
 				{
+					kernel_lb_addr neloc = { 0, 0 };
 					extoffset -= adsize;
-					elen = EXT_RECORDED_ALLOCATED |
-						((elen + inode->i_sb->s_blocksize - 1) &
+					nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
+						((elen + offset + inode->i_sb->s_blocksize - 1) &
 						~(inode->i_sb->s_blocksize - 1));
-					udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+					udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+					udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
+				}
+				else
+				{
+					if (elen & (inode->i_sb->s_blocksize - 1))
+					{
+						extoffset -= adsize;
+						elen = EXT_RECORDED_ALLOCATED |
+							((elen + inode->i_sb->s_blocksize - 1) &
+							~(inode->i_sb->s_blocksize - 1));
+						udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+					}
+					memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+					elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+					udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 				}
-				memset(&eloc, 0x00, sizeof(kernel_lb_addr));
-				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
-				udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 			}
 		}
 	}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e7c8615beb6..30c6e8a9446 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
 
 static struct buffer_head *
 ufs_clear_frags(struct inode *inode, sector_t beg,
-		unsigned int n)
+		unsigned int n, sector_t want)
 {
-	struct buffer_head *res, *bh;
+	struct buffer_head *res = NULL, *bh;
 	sector_t end = beg + n;
 
-	res = sb_getblk(inode->i_sb, beg);
-	ufs_clear_frag(inode, res);
-	for (++beg; beg < end; ++beg) {
+	for (; beg < end; ++beg) {
 		bh = sb_getblk(inode->i_sb, beg);
 		ufs_clear_frag(inode, bh);
-		brelse(bh);
+		if (want != beg)
+			brelse(bh);
+		else
+			res = bh;
 	}
+	BUG_ON(!res);
 	return res;
 }
 
@@ -265,7 +267,9 @@ repeat:
 			lastfrag = ufsi->i_lastfrag;
 			
 		}
-		goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb;
+		tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
+		if (tmp)
+			goal = tmp + uspi->s_fpb;
 		tmp = ufs_new_fragments (inode, p, fragment - blockoff, 
 					 goal, required + blockoff,
 					 err, locked_page);
@@ -277,13 +281,15 @@ repeat:
 		tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
 					fs32_to_cpu(sb, *p), required +  (blockoff - lastblockoff),
 					err, locked_page);
-	}
+	} else /* (lastblock > block) */ {
 	/*
 	 * We will allocate new block before last allocated block
 	 */
-	else /* (lastblock > block) */ {
-		if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1])))
-			goal = tmp + uspi->s_fpb;
+		if (block) {
+			tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
+			if (tmp)
+				goal = tmp + uspi->s_fpb;
+		}
 		tmp = ufs_new_fragments(inode, p, fragment - blockoff,
 					goal, uspi->s_fpb, err, locked_page);
 	}
@@ -296,7 +302,7 @@ repeat:
 	}
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp + blockoff, required);
+		result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		result = NULL;
@@ -383,7 +389,7 @@ repeat:
 		}
 	}
 
-	if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb))
+	if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
 		goal = tmp + uspi->s_fpb;
 	else
 		goal = bh->b_blocknr + uspi->s_fpb;
@@ -397,7 +403,8 @@ repeat:
 
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb);
+		result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
+					 tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		*new = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c9b55872079..ea11d04c41a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode)
 	int err = 0;
 	struct address_space *mapping = inode->i_mapping;
 	struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
 	unsigned lastfrag, i, end;
 	struct page *lastpage;
 	struct buffer_head *bh;
 
 	lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
 
-	if (!lastfrag) {
-		ufsi->i_lastfrag = 0;
+	if (!lastfrag)
 		goto out;
-	}
+
 	lastfrag--;
 
 	lastpage = ufs_get_locked_page(mapping, lastfrag >>
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode)
        for (i = 0; i < end; ++i)
                bh = bh->b_this_page;
 
-       if (!buffer_mapped(bh)) {
-               err = ufs_getfrag_block(inode, lastfrag, bh, 1);
-
-               if (unlikely(err))
-                       goto out_unlock;
-
-               if (buffer_new(bh)) {
-                       clear_buffer_new(bh);
-                       unmap_underlying_metadata(bh->b_bdev,
-						 bh->b_blocknr);
-		       /*
-			* we do not zeroize fragment, because of
-			* if it maped to hole, it already contains zeroes
-			*/
-                       set_buffer_uptodate(bh);
-                       mark_buffer_dirty(bh);
-                       set_page_dirty(lastpage);
-               }
+
+       err = ufs_getfrag_block(inode, lastfrag, bh, 1);
+
+       if (unlikely(err))
+	       goto out_unlock;
+
+       if (buffer_new(bh)) {
+	       clear_buffer_new(bh);
+	       unmap_underlying_metadata(bh->b_bdev,
+					 bh->b_blocknr);
+	       /*
+		* we do not zeroize fragment, because of
+		* if it maped to hole, it already contains zeroes
+		*/
+	       set_buffer_uptodate(bh);
+	       mark_buffer_dirty(bh);
+	       set_page_dirty(lastpage);
        }
+
 out_unlock:
        ufs_put_locked_page(lastpage);
 out:
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;
 
-	if (inode->i_size > old_i_size) {
-		/*
-		 * if we expand file we should care about
-		 * allocation of block for last byte first of all
-		 */
-		err = ufs_alloc_lastblock(inode);
+	err = ufs_alloc_lastblock(inode);
 
-		if (err) {
-			i_size_write(inode, old_i_size);
-			goto out;
-		}
-		/*
-		 * go away, because of we expand file, and we do not
-		 * need free blocks, and zeroizes page
-		 */
-		lock_kernel();
-		goto almost_end;
+	if (err) {
+		i_size_write(inode, old_i_size);
+		goto out;
 	}
 
 	block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 		yield();
 	}
 
-	if (inode->i_size < old_i_size) {
-		/*
-		 * now we should have enough space
-		 * to allocate block for last byte
-		 */
-		err = ufs_alloc_lastblock(inode);
-		if (err)
-			/*
-			 * looks like all the same - we have no space,
-			 * but we truncate file already
-			 */
-			inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
-	}
-almost_end:
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	unlock_kernel();
 	mark_inode_dirty(inode);
 out: