From 1c0eeaf5698597146ed9b873e2f9e0961edcf0f9 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 16 Oct 2007 23:30:44 -0700 Subject: introduce I_SYNC I_LOCK was used for several unrelated purposes, which caused deadlock situations in certain filesystems as a side effect. One of the purposes now uses the new I_SYNC bit. Also document the various bits and change their order from historical to logical. [bunk@stusta.de: make fs/inode.c:wake_up_inode() static] Signed-off-by: Joern Engel Cc: Dave Kleikamp Cc: David Chinner Cc: Anton Altaparmakov Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 39 ++++++++++++++++++++++++--------------- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 24 ++++++++++++------------ fs/jfs/jfs_txnmgr.c | 9 ++++++++- fs/xfs/linux-2.6/xfs_iops.c | 4 ++-- 5 files changed, 47 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 71c158ac60a..686734ff973 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -100,11 +100,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state |= flags; /* - * If the inode is locked, just update its dirty state. + * If the inode is being synced, just update its dirty state. * The unlocker will place the inode on the appropriate * superblock list, based upon its state. */ - if (inode->i_state & I_LOCK) + if (inode->i_state & I_SYNC) goto out; /* @@ -172,6 +172,15 @@ static void requeue_io(struct inode *inode) list_move(&inode->i_list, &inode->i_sb->s_more_io); } +static void inode_sync_complete(struct inode *inode) +{ + /* + * Prevent speculative execution through spin_unlock(&inode_lock); + */ + smp_mb(); + wake_up_bit(&inode->i_state, __I_SYNC); +} + /* * Move expired dirty inodes from @delaying_queue to @dispatch_queue. */ @@ -225,11 +234,11 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) int wait = wbc->sync_mode == WB_SYNC_ALL; int ret; - BUG_ON(inode->i_state & I_LOCK); + BUG_ON(inode->i_state & I_SYNC); - /* Set I_LOCK, reset I_DIRTY */ + /* Set I_SYNC, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; - inode->i_state |= I_LOCK; + inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY; spin_unlock(&inode_lock); @@ -250,7 +259,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } spin_lock(&inode_lock); - inode->i_state &= ~I_LOCK; + inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_DIRTY) && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { @@ -305,7 +314,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) list_move(&inode->i_list, &inode_unused); } } - wake_up_inode(inode); + inode_sync_complete(inode); return ret; } @@ -324,7 +333,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) else WARN_ON(inode->i_state & I_WILL_FREE); - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { + if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { struct address_space *mapping = inode->i_mapping; int ret; @@ -350,16 +359,16 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* * It's a data-integrity sync. We must wait. */ - if (inode->i_state & I_LOCK) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK); + if (inode->i_state & I_SYNC) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); - wqh = bit_waitqueue(&inode->i_state, __I_LOCK); + wqh = bit_waitqueue(&inode->i_state, __I_SYNC); do { spin_unlock(&inode_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); spin_lock(&inode_lock); - } while (inode->i_state & I_LOCK); + } while (inode->i_state & I_SYNC); } return __sync_single_inode(inode, wbc); } @@ -392,7 +401,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * The inodes to be written are parked on sb->s_io. They are moved back onto * sb->s_dirty as they are selected for writing. This way, none can be missed * on the writer throttling path, and we get decent balancing between many - * throttled threads: we don't want them all piling up on __wait_on_inode. + * throttled threads: we don't want them all piling up on inode_sync_wait. */ static void sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) @@ -661,7 +670,7 @@ int write_inode_now(struct inode *inode, int sync) ret = __writeback_single_inode(inode, &wbc); spin_unlock(&inode_lock); if (sync) - wait_on_inode(inode); + inode_sync_wait(inode); return ret; } EXPORT_SYMBOL(write_inode_now); @@ -736,7 +745,7 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int err = err2; } else - wait_on_inode(inode); + inode_sync_wait(inode); return err; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6bf6890f053..0f5df73dbb7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -384,7 +384,7 @@ static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_LOCK))) + if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; if (!sb || (sb->s_flags & MS_ACTIVE)) { diff --git a/fs/inode.c b/fs/inode.c index c6165771e00..ed35383d0b6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -99,6 +99,15 @@ struct inodes_stat_t inodes_stat; static struct kmem_cache * inode_cachep __read_mostly; +static void wake_up_inode(struct inode *inode) +{ + /* + * Prevent speculative execution through spin_unlock(&inode_lock); + */ + smp_mb(); + wake_up_bit(&inode->i_state, __I_LOCK); +} + static struct inode *alloc_inode(struct super_block *sb) { static const struct address_space_operations empty_aops; @@ -232,7 +241,7 @@ void __iget(struct inode * inode) return; } atomic_inc(&inode->i_count); - if (!(inode->i_state & (I_DIRTY|I_LOCK))) + if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_in_use); inodes_stat.nr_unused--; } @@ -253,7 +262,7 @@ void clear_inode(struct inode *inode) BUG_ON(inode->i_data.nrpages); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); - wait_on_inode(inode); + inode_sync_wait(inode); DQUOT_DROP(inode); if (inode->i_sb->s_op->clear_inode) inode->i_sb->s_op->clear_inode(inode); @@ -1071,7 +1080,7 @@ static void generic_forget_inode(struct inode *inode) struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_LOCK))) + if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { @@ -1314,15 +1323,6 @@ static void __wait_on_freeing_inode(struct inode *inode) spin_lock(&inode_lock); } -void wake_up_inode(struct inode *inode) -{ - /* - * Prevent speculative execution through spin_unlock(&inode_lock); - */ - smp_mb(); - wake_up_bit(&inode->i_state, __I_LOCK); -} - /* * We rarely want to lock two inodes that do not have a parent/child * relationship (such as directory, child inode) simultaneously. The diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 7aa1f7004ea..e7c60ae6b5b 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1289,7 +1289,14 @@ int txCommit(tid_t tid, /* transaction identifier */ * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later) */ - if (tblk->u.ip->i_state & I_LOCK) + /* + * I believe this code is no longer needed. Splitting I_LOCK + * into two bits, I_LOCK and I_SYNC should prevent this + * deadlock as well. But since I don't have a JFS testload + * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. + * Joern + */ + if (tblk->u.ip->i_state & I_SYNC) tblk->xflag &= ~COMMIT_LAZY; } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 0b5fa124bef..e0e06dd4bef 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -133,7 +133,7 @@ xfs_ichgtime( */ SYNCHRONIZE(); ip->i_update_core = 1; - if (!(inode->i_state & I_LOCK)) + if (!(inode->i_state & I_SYNC)) mark_inode_dirty_sync(inode); } @@ -185,7 +185,7 @@ xfs_ichgtime_fast( */ SYNCHRONIZE(); ip->i_update_core = 1; - if (!(inode->i_state & I_LOCK)) + if (!(inode->i_state & I_SYNC)) mark_inode_dirty_sync(inode); } -- cgit v1.2.3