16 files changed, 269 insertions, 223 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
new file mode 100644
index 00000000000..701b7a3a872
--- /dev/null
+++ b/fs/ocfs2/Kconfig
@@ -0,0 +1,85 @@
+config OCFS2_FS
+	tristate "OCFS2 file system support"
+	depends on NET && SYSFS
+	select CONFIGFS_FS
+	select JBD2
+	select CRC32
+	select QUOTA
+	select QUOTA_TREE
+	help
+	  OCFS2 is a general purpose extent based shared disk cluster file
+	  system with many similarities to ext3. It supports 64 bit inode
+	  numbers, and has automatically extending metadata groups which may
+	  also make it attractive for non-clustered use.
+
+	  You'll want to install the ocfs2-tools package in order to at least
+	  get "mount.ocfs2".
+
+	  Project web page:    http://oss.oracle.com/projects/ocfs2
+	  Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
+	  OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+
+	  For more information on OCFS2, see the file
+	  <file:Documentation/filesystems/ocfs2.txt>.
+
+config OCFS2_FS_O2CB
+	tristate "O2CB Kernelspace Clustering"
+	depends on OCFS2_FS
+	default y
+	help
+	  OCFS2 includes a simple kernelspace clustering package, the OCFS2
+	  Cluster Base.  It only requires a very small userspace component
+	  to configure it. This comes with the standard ocfs2-tools package.
+	  O2CB is limited to maintaining a cluster for OCFS2 file systems.
+	  It cannot manage any other cluster applications.
+
+	  It is always safe to say Y here, as the clustering method is
+	  run-time selectable.
+
+config OCFS2_FS_USERSPACE_CLUSTER
+	tristate "OCFS2 Userspace Clustering"
+	depends on OCFS2_FS && DLM
+	default y
+	help
+	  This option will allow OCFS2 to use userspace clustering services
+	  in conjunction with the DLM in fs/dlm.  If you are using a
+	  userspace cluster manager, say Y here.
+
+	  It is safe to say Y, as the clustering method is run-time
+	  selectable.
+
+config OCFS2_FS_STATS
+	bool "OCFS2 statistics"
+	depends on OCFS2_FS
+	default y
+	help
+	  This option allows some fs statistics to be captured. Enabling
+	  this option may increase the memory consumption.
+
+config OCFS2_DEBUG_MASKLOG
+	bool "OCFS2 logging support"
+	depends on OCFS2_FS
+	default y
+	help
+	  The ocfs2 filesystem has an extensive logging system.  The system
+	  allows selection of events to log via files in /sys/o2cb/logmask/.
+	  This option will enlarge your kernel, but it allows debugging of
+	  ocfs2 filesystem issues.
+
+config OCFS2_DEBUG_FS
+	bool "OCFS2 expensive checks"
+	depends on OCFS2_FS
+	default n
+	help
+	  This option will enable expensive consistency checks. Enable
+	  this option for debugging only as it is likely to decrease
+	  performance of the filesystem.
+
+config OCFS2_FS_POSIX_ACL
+	bool "OCFS2 POSIX Access Control Lists"
+	depends on OCFS2_FS
+	select FS_POSIX_ACL
+	default n
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d8..19e3a96aa02 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
 
 	BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
 	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
-			(OCFS2_I(inode)->ip_clusters != rec->e_cpos),
+			(OCFS2_I(inode)->ip_clusters !=
+			 le32_to_cpu(rec->e_cpos)),
 			"Device %s, asking for sparse allocation: inode %llu, "
 			"cpos %u, clusters %u\n",
 			osb->dev_str,
@@ -4796,6 +4797,29 @@ out:
 	return ret;
 }
 
+static int ocfs2_replace_extent_rec(struct inode *inode,
+				    handle_t *handle,
+				    struct ocfs2_path *path,
+				    struct ocfs2_extent_list *el,
+				    int split_index,
+				    struct ocfs2_extent_rec *split_rec)
+{
+	int ret;
+
+	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+					   path_num_items(path) - 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	el->l_recs[split_index] = *split_rec;
+
+	ocfs2_journal_dirty(handle, path_leaf_bh(path));
+out:
+	return ret;
+}
+
 /*
  * Mark part or all of the extent record at split_index in the leaf
  * pointed to by path as written. This removes the unwritten
@@ -4885,7 +4909,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 
 	if (ctxt.c_contig_type == CONTIG_NONE) {
 		if (ctxt.c_split_covers_rec)
-			el->l_recs[split_index] = *split_rec;
+			ret = ocfs2_replace_extent_rec(inode, handle,
+						       path, el,
+						       split_index, split_rec);
 		else
 			ret = ocfs2_split_and_insert(inode, handle, path, et,
 						     &last_eb_bh, split_index,
@@ -5390,6 +5416,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
+	vfs_dq_free_space_nodirty(inode,
+				  ocfs2_clusters_to_bytes(inode->i_sb, len));
+
 	ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
 				  dealloc);
 	if (ret) {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb0..8e1709a679b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 	size = i_size_read(inode);
 
 	if (size > PAGE_CACHE_SIZE ||
-	    size > ocfs2_max_inline_data(inode->i_sb)) {
+	    size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
 		ocfs2_error(inode->i_sb,
 			    "Inode %llu has with inline data has bad size: %Lu",
 			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
 	int ret, written = 0;
 	loff_t end = pos + len;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = NULL;
 
 	mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
 	     (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
 	/*
 	 * Check whether the write can fit.
 	 */
-	if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
+	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+	if (mmap_page ||
+	    end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
 		return 0;
 
 do_inline_write:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e8..e9d7c2038c0 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
 #include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
+#include "super.h"
 
 
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
 	return ret;
 }
 
+static DEFINE_SPINLOCK(dentry_list_lock);
+
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+	struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+					       dentry_lock_work);
+	struct ocfs2_dentry_lock *dl;
+	int drop_count = DL_INODE_DROP_COUNT;
+
+	spin_lock(&dentry_list_lock);
+	while (osb->dentry_lock_list && drop_count--) {
+		dl = osb->dentry_lock_list;
+		osb->dentry_lock_list = dl->dl_next;
+		spin_unlock(&dentry_list_lock);
+		iput(dl->dl_inode);
+		kfree(dl);
+		spin_lock(&dentry_list_lock);
+	}
+	if (osb->dentry_lock_list)
+		queue_work(ocfs2_wq, &osb->dentry_lock_work);
+	spin_unlock(&dentry_list_lock);
+}
+
 /*
  * ocfs2_dentry_iput() and friends.
  *
@@ -318,16 +347,23 @@ out_attach:
 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
 				   struct ocfs2_dentry_lock *dl)
 {
-	iput(dl->dl_inode);
 	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
 	ocfs2_lock_res_free(&dl->dl_lockres);
-	kfree(dl);
+
+	/* We leave dropping of inode reference to ocfs2_wq as that can
+	 * possibly lead to inode deletion which gets tricky */
+	spin_lock(&dentry_list_lock);
+	if (!osb->dentry_lock_list)
+		queue_work(ocfs2_wq, &osb->dentry_lock_work);
+	dl->dl_next = osb->dentry_lock_list;
+	osb->dentry_lock_list = dl;
+	spin_unlock(&dentry_list_lock);
 }
 
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl)
 {
-	int unlock = 0;
+	int unlock;
 
 	BUG_ON(dl->dl_count == 0);
 
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d988..d06e16c0664 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
 extern struct dentry_operations ocfs2_dentry_ops;
 
 struct ocfs2_dentry_lock {
+	/* Use count of dentry lock */
 	unsigned int		dl_count;
-	u64			dl_parent_blkno;
+	union {
+		/* Linked list of dentry locks to release */
+		struct ocfs2_dentry_lock *dl_next;
+		u64			dl_parent_blkno;
+	};
 
 	/*
 	 * The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl);
 
+void ocfs2_drop_dl_inodes(struct work_struct *work);
+
 struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
 				      int skip_unhashed);
 
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 54e182a27ca..0a281394785 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
 		if (!mle) {
 			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
 			    res->owner != assert->node_idx) {
-				mlog(ML_ERROR, "assert_master from "
-					  "%u, but current owner is "
-					  "%u! (%.*s)\n",
-				       assert->node_idx, res->owner,
-				       namelen, name);
-				goto kill;
+				mlog(ML_ERROR, "DIE! Mastery assert from %u, "
+				     "but current owner is %u! (%.*s)\n",
+				     assert->node_idx, res->owner, namelen,
+				     name);
+				__dlm_print_one_lock_resource(res);
+				BUG();
 			}
 		} else if (mle->type != DLM_MLE_MIGRATION) {
 			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d1295203029..4060bb328bc 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 
 		spin_lock(&res->spinlock);
 		/* This ensures that clear refmap is sent after the set */
-		__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
-						  DLM_LOCK_RES_MIGRATING));
+		__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
 		spin_unlock(&res->spinlock);
 
 		/* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 86ca085ef32..fcf879ed693 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
 	else
 		BUG_ON(res->owner == dlm->node_num);
 
-	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->ast_lock);
 	/* We want to be sure that we're not freeing a lock
 	 * that still has AST's pending... */
 	in_use = !list_empty(&lock->ast_list);
-	spin_unlock(&dlm->spinlock);
+	spin_unlock(&dlm->ast_lock);
 	if (in_use) {
 	       mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
 		    "while waiting for an ast!", res->lockname.len,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c4..7219a86d34c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres);
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert);
-#define ocfs2_log_dlm_error(_func, _err, _lockres) do {			\
-	mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
-	     _err, _func, _lockres->l_name);				\
+#define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
+	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
+		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
+		     _err, _func, _lockres->l_name);					\
+	else										\
+		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
+		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
+		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
 } while (0)
 static int ocfs2_downconvert_thread(void *arg);
 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
@@ -2860,6 +2865,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
 	case OCFS2_UNLOCK_CANCEL_CONVERT:
 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
 		lockres->l_action = OCFS2_AST_INVALID;
+		/* Downconvert thread may have requeued this lock, we
+		 * need to wake it. */
+		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
+			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
 		break;
 	case OCFS2_UNLOCK_DROP_LOCK:
 		lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307..172850a9a12 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
 					       loff_t new_size)
 {
-	return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
-						   new_size);
+	return jbd2_journal_begin_ordered_truncate(
+				OCFS2_SB(inode->i_sb)->journal->j_journal,
+				&OCFS2_I(inode)->ip_jinode,
+				new_size);
 }
 
 #endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b..4b11762f249 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 		fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
 
-		fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
+		fe->id2.i_data.id_count = cpu_to_le16(
+				ocfs2_max_inline_data_with_xattr(osb->sb, fe));
 	} else {
 		fel = &fe->id2.i_list;
 		fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29ed..946d3c34b90 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
 struct ocfs2_slot_info;
 struct ocfs2_recovery_map;
 struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
 	struct list_head blocked_lock_list;
 	unsigned long blocked_lock_count;
 
+	/* List of dentry locks to release. Anyone can add locks to
+	 * the list, ocfs2_wq processes the list  */
+	struct ocfs2_dentry_lock *dentry_lock_list;
+	struct work_struct dentry_lock_work;
+
 	wait_queue_head_t		osb_mount_event;
 
 	/* Truncate log info */
@@ -335,6 +341,9 @@ struct ocfs2_super
 	struct ocfs2_node_map		osb_recovering_orphan_dirs;
 	unsigned int			*osb_orphan_wipes;
 	wait_queue_head_t		osb_wipe_event;
+
+	/* used to protect metaecc calculation check of xattr. */
+	spinlock_t osb_xattr_lock;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36..2332ef740f4 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
 		 offsetof(struct ocfs2_dinode, id2.i_symlink);
 }
 
-static inline int ocfs2_max_inline_data(struct super_block *sb)
-{
-	return sb->s_blocksize -
-		offsetof(struct ocfs2_dinode, id2.i_data.id_data);
-}
-
 static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
 						   struct ocfs2_dinode *di)
 {
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6aff8f2d3e4..1ed0f7c8686 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
 	if (dquot->dq_flags & mask)
 		sync = 1;
 	spin_unlock(&dq_data_lock);
-	if (!sync) {
+	/* This is a slight hack but we can't afford getting global quota
+	 * lock if we already have a transaction started. */
+	if (!sync || journal_current_handle()) {
 		status = ocfs2_write_dquot(dquot);
 		goto out;
 	}
@@ -810,171 +812,6 @@ out:
 	return status;
 }
 
-/* This is difficult. We have to lock quota inode and start transaction
- * in this function but we don't want to take the penalty of exlusive
- * quota file lock when we are just going to use cached structures. So
- * we just take read lock check whether we have dquot cached and if so,
- * we don't have to take the write lock... */
-static int ocfs2_dquot_initialize(struct inode *inode, int type)
-{
-	handle_t *handle = NULL;
-	int status = 0;
-	struct super_block *sb = inode->i_sb;
-	struct ocfs2_mem_dqinfo *oinfo;
-	int exclusive = 0;
-	int cnt;
-	qid_t id;
-
-	mlog_entry_void();
-
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (type != -1 && cnt != type)
-			continue;
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-		status = ocfs2_lock_global_qf(oinfo, 0);
-		if (status < 0)
-			goto out;
-		/* This is just a performance optimization not a reliable test.
-		 * Since we hold an inode lock, noone can actually release
-		 * the structure until we are finished with initialization. */
-		if (inode->i_dquot[cnt] != NODQUOT) {
-			ocfs2_unlock_global_qf(oinfo, 0);
-			continue;
-		}
-		/* When we have inode lock, we know that no dquot_release() can
-		 * run and thus we can safely check whether we need to
-		 * read+modify global file to get quota information or whether
-		 * our node already has it. */
-		if (cnt == USRQUOTA)
-			id = inode->i_uid;
-		else if (cnt == GRPQUOTA)
-			id = inode->i_gid;
-		else
-			BUG();
-		/* Obtain exclusion from quota off... */
-		down_write(&sb_dqopt(sb)->dqptr_sem);
-		exclusive = !dquot_is_cached(sb, id, cnt);
-		up_write(&sb_dqopt(sb)->dqptr_sem);
-		if (exclusive) {
-			status = ocfs2_lock_global_qf(oinfo, 1);
-			if (status < 0) {
-				exclusive = 0;
-				mlog_errno(status);
-				goto out_ilock;
-			}
-			handle = ocfs2_start_trans(OCFS2_SB(sb),
-					ocfs2_calc_qinit_credits(sb, cnt));
-			if (IS_ERR(handle)) {
-				status = PTR_ERR(handle);
-				mlog_errno(status);
-				goto out_ilock;
-			}
-		}
-		dquot_initialize(inode, cnt);
-		if (exclusive) {
-			ocfs2_commit_trans(OCFS2_SB(sb), handle);
-			ocfs2_unlock_global_qf(oinfo, 1);
-		}
-		ocfs2_unlock_global_qf(oinfo, 0);
-	}
-	mlog_exit(0);
-	return 0;
-out_ilock:
-	if (exclusive)
-		ocfs2_unlock_global_qf(oinfo, 1);
-	ocfs2_unlock_global_qf(oinfo, 0);
-out:
-	mlog_exit(status);
-	return status;
-}
-
-static int ocfs2_dquot_drop_slow(struct inode *inode)
-{
-	int status = 0;
-	int cnt;
-	int got_lock[MAXQUOTAS] = {0, 0};
-	handle_t *handle;
-	struct super_block *sb = inode->i_sb;
-	struct ocfs2_mem_dqinfo *oinfo;
-
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-		status = ocfs2_lock_global_qf(oinfo, 1);
-		if (status < 0)
-			goto out;
-		got_lock[cnt] = 1;
-	}
-	handle = ocfs2_start_trans(OCFS2_SB(sb),
-			ocfs2_calc_qinit_credits(sb, USRQUOTA) +
-			ocfs2_calc_qinit_credits(sb, GRPQUOTA));
-	if (IS_ERR(handle)) {
-		status = PTR_ERR(handle);
-		mlog_errno(status);
-		goto out;
-	}
-	dquot_drop(inode);
-	ocfs2_commit_trans(OCFS2_SB(sb), handle);
-out:
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (got_lock[cnt]) {
-			oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-			ocfs2_unlock_global_qf(oinfo, 1);
-		}
-	return status;
-}
-
-/* See the comment before ocfs2_dquot_initialize. */
-static int ocfs2_dquot_drop(struct inode *inode)
-{
-	int status = 0;
-	struct super_block *sb = inode->i_sb;
-	struct ocfs2_mem_dqinfo *oinfo;
-	int exclusive = 0;
-	int cnt;
-	int got_lock[MAXQUOTAS] = {0, 0};
-
-	mlog_entry_void();
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-		status = ocfs2_lock_global_qf(oinfo, 0);
-		if (status < 0)
-			goto out;
-		got_lock[cnt] = 1;
-	}
-	/* Lock against anyone releasing references so that when when we check
-	 * we know we are not going to be last ones to release dquot */
-	down_write(&sb_dqopt(sb)->dqptr_sem);
-	/* Urgh, this is a terrible hack :( */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt] != NODQUOT &&
-		    atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
-			exclusive = 1;
-			break;
-		}
-	}
-	if (!exclusive)
-		dquot_drop_locked(inode);
-	up_write(&sb_dqopt(sb)->dqptr_sem);
-out:
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (got_lock[cnt]) {
-			oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-			ocfs2_unlock_global_qf(oinfo, 0);
-		}
-	/* In case we bailed out because we had to do expensive locking
-	 * do it now... */
-	if (exclusive)
-		status = ocfs2_dquot_drop_slow(inode);
-	mlog_exit(status);
-	return status;
-}
-
 static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
 {
 	struct ocfs2_dquot *dquot =
@@ -991,8 +828,8 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 }
 
 struct dquot_operations ocfs2_quota_operations = {
-	.initialize	= ocfs2_dquot_initialize,
-	.drop		= ocfs2_dquot_drop,
+	.initialize	= dquot_initialize,
+	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
 	.free_space	= dquot_free_space,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b5..7ac83a81ee5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb,
 	unlock_buffer(*bh);
 	ll_rw_block(READ, 1, bh);
 	wait_on_buffer(*bh);
+	if (!buffer_uptodate(*bh)) {
+		mlog_errno(-EIO);
+		brelse(*bh);
+		*bh = NULL;
+		return -EIO;
+	}
+
 	return 0;
 }
 
@@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	INIT_LIST_HEAD(&osb->blocked_lock_list);
 	osb->blocked_lock_count = 0;
 	spin_lock_init(&osb->osb_lock);
+	spin_lock_init(&osb->osb_xattr_lock);
 	ocfs2_init_inode_steal_slot(osb);
 
 	atomic_set(&osb->alloc_stats.moves, 0);
@@ -1887,6 +1895,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
 	journal->j_state = OCFS2_JOURNAL_FREE;
 
+	INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+	osb->dentry_lock_list = NULL;
+
 	/* get some pseudo constants for clustersize bits */
 	osb->s_clustersize_bits =
 		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac..2563df89fc2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt {
 
 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
 #define OCFS2_XATTR_INLINE_SIZE	80
+#define OCFS2_XATTR_HEADER_GAP	4
 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
 					 - sizeof(struct ocfs2_xattr_header) \
-					 - sizeof(__u32))
+					 - OCFS2_XATTR_HEADER_GAP)
 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
 					 - sizeof(struct ocfs2_xattr_block) \
 					 - sizeof(struct ocfs2_xattr_header) \
-					 - sizeof(__u32))
+					 - OCFS2_XATTR_HEADER_GAP)
 
 static struct ocfs2_xattr_def_value_root def_xv = {
 	.xv.xr_list.l_count = cpu_to_le16(1),
@@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 			       bucket->bu_blocks, bucket->bu_bhs, 0,
 			       NULL);
 	if (!rc) {
+		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
 						 bucket->bu_bhs,
 						 bucket->bu_blocks,
 						 &bucket_xh(bucket)->xh_check);
+		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 		if (rc)
 			mlog_errno(rc);
 	}
@@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 {
 	int i;
 
+	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
 				   bucket->bu_bhs, bucket->bu_blocks,
 				   &bucket_xh(bucket)->xh_check);
+	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 
 	for (i = 0; i < bucket->bu_blocks; i++)
 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
@@ -542,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 	 * when blocksize = 512, may reserve one more cluser for
 	 * xattr bucket, otherwise reserve one metadata block
 	 * for them is ok.
+	 * If this is a new directory with inline data,
+	 * we choose to reserve the entire inline area for
+	 * directory contents and force an external xattr block.
 	 */
 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
 		if (ret) {
@@ -1507,7 +1516,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		last += 1;
 	}
 
-	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
 	if (free < 0)
 		return -EIO;
 
@@ -2190,7 +2199,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
 		last += 1;
 	}
 
-	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
 	if (free < 0)
 		return 0;
 
@@ -2592,8 +2601,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
 	if (!ret) {
 		/* Update inode ctime. */
-		ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
+		ret = ocfs2_journal_access_di(ctxt->handle, inode,
+					      xis->inode_bh,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4729,13 +4739,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
 		(vb.vb_bh->b_data + offset % blocksize);
 
-	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
-						OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	/*
 	 * From here on out we have to dirty the bucket.  The generic
 	 * value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4751,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_dirty;
+		goto out;
+	}
+
+	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
 	}
 
 	xe->xe_value_size = cpu_to_le64(len);
 
-out_dirty:
 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
 
 out:
@@ -4786,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 						char *val,
 						int value_len)
 {
-	int offset;
+	int ret, offset, block_off;
 	struct ocfs2_xattr_value_root *xv;
 	struct ocfs2_xattr_entry *xe = xs->here;
+	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
+	void *base;
 
 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
 
-	offset = le16_to_cpu(xe->xe_name_offset) +
-		 OCFS2_XATTR_SIZE(xe->xe_name_len);
+	ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+						xe - xh->xh_entries,
+						&block_off,
+						&offset);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
 
-	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
+	base = bucket_block(xs->bucket, block_off);
+	xv = (struct ocfs2_xattr_value_root *)(base + offset +
+		 OCFS2_XATTR_SIZE(xe->xe_name_len));
 
-	return __ocfs2_xattr_set_value_outside(inode, handle,
-					       xv, val, value_len);
+	ret = __ocfs2_xattr_set_value_outside(inode, handle,
+					      xv, val, value_len);
+	if (ret)
+		mlog_errno(ret);
+out:
+	return ret;
 }
 
 static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -5061,8 +5084,8 @@ try_again:
 	xh_free_start = le16_to_cpu(xh->xh_free_start);
 	header_size = sizeof(struct ocfs2_xattr_header) +
 			count * sizeof(struct ocfs2_xattr_entry);
-	max_free = OCFS2_XATTR_BUCKET_SIZE -
-		le16_to_cpu(xh->xh_name_value_len) - header_size;
+	max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
+		le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
 
 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
 			"of %u which exceed block size\n",
@@ -5095,7 +5118,7 @@ try_again:
 			need = 0;
 	}
 
-	free = xh_free_start - header_size;
+	free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
 	/*
 	 * We need to make sure the new name/value pair
 	 * can exist in the same block.
@@ -5128,7 +5151,8 @@ try_again:
 			}
 
 			xh_free_start = le16_to_cpu(xh->xh_free_start);
-			free = xh_free_start - header_size;
+			free = xh_free_start - header_size
+				- OCFS2_XATTR_HEADER_GAP;
 			if (xh_free_start % blocksize < need)
 				free -= xh_free_start % blocksize;