From 7c52b166c588c98cf3d2b2e7e6a0468a98e84d0d Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Fri, 16 Mar 2007 10:26:37 +0000 Subject: [GFS2] Add gfs2_tool lockdump support to gfs2 (bz 228540) The attached patch resolves bz 228540. This adds the capability for gfs2 to dump gfs2 locks through the debugfs file system. This used to exist in gfs1 as "gfs_tool lockdump" but it's missing from gfs2 because all the ioctls were stripped out. Please see the bugzilla for more history about the fix. This patch is also attached to the bugzilla record. The patch is against Steve Whitehouse's latest nmw git tree kernel (2.6.21-rc1) and has been tested on system trin-10. Signed-off-by: Robert Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 314 ++++++++++++++++++++++++++++++++++++++++----------- fs/gfs2/glock.h | 4 + fs/gfs2/incore.h | 1 + fs/gfs2/main.c | 3 + fs/gfs2/ops_fstype.c | 3 + 5 files changed, 262 insertions(+), 63 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 12accb08fe0..9f203ef4da6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -40,14 +42,22 @@ struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; +struct glock_iter { + int hash; /* hash bucket index */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + struct hlist_head *hb_list; /* current hash bucket ptr */ + struct seq_file *seq; /* sequence file for debugfs */ + char string[512]; /* scratch space */ +}; + typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct gfs2_glock *gl); -static int dump_inode(struct gfs2_inode *ip); static void gfs2_glock_xmote_th(struct gfs2_holder *gh); static void gfs2_glock_drop_th(struct gfs2_glock *gl); static DECLARE_RWSEM(gfs2_umount_flush_sem); +static struct dentry *gfs2_root; #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) @@ -1109,6 +1119,20 @@ find_holder_by_owner(struct list_head *head, struct task_struct *owner) return NULL; } +static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + if (gi) { + vsprintf(gi->string, fmt, args); + seq_printf(gi->seq, gi->string); + } + else + vprintk(fmt, args); + va_end(args); +} + /** * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add @@ -1849,31 +1873,32 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(char *str, struct gfs2_holder *gh) +static int dump_holder(struct glock_iter *gi, char *str, + struct gfs2_holder *gh) { unsigned int x; - int error = -ENOBUFS; - printk(KERN_INFO " %s\n", str); - printk(KERN_INFO " owner = %ld\n", + print_dbg(gi, " %s\n", str); + print_dbg(gi, " owner = %ld\n", (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); - printk(KERN_INFO " gh_state = %u\n", gh->gh_state); - printk(KERN_INFO " gh_flags ="); + print_dbg(gi, " gh_state = %u\n", gh->gh_state); + print_dbg(gi, " gh_flags ="); for (x = 0; x < 32; x++) if (gh->gh_flags & (1 << x)) - printk(" %u", x); - printk(" \n"); - printk(KERN_INFO " error = %d\n", gh->gh_error); - printk(KERN_INFO " gh_iflags ="); + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + print_dbg(gi, " error = %d\n", gh->gh_error); + print_dbg(gi, " gh_iflags ="); for (x = 0; x < 32; x++) if (test_bit(x, &gh->gh_iflags)) - printk(" %u", x); - printk(" \n"); - print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); - - error = 0; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + if (gi) + print_dbg(gi, " initialized at: 0x%x\n", gh->gh_ip); + else + print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); - return error; + return 0; } /** @@ -1883,25 +1908,20 @@ static int dump_holder(char *str, struct gfs2_holder *gh) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_inode(struct gfs2_inode *ip) +static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) { unsigned int x; - int error = -ENOBUFS; - printk(KERN_INFO " Inode:\n"); - printk(KERN_INFO " num = %llu %llu\n", - (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr); - printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); - printk(KERN_INFO " i_flags ="); + print_dbg(gi, " Inode:\n"); + print_dbg(gi, " num = %llu/%llu\n", + ip->i_num.no_formal_ino, ip->i_num.no_addr); + print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); + print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) if (test_bit(x, &ip->i_flags)) - printk(" %u", x); - printk(" \n"); - - error = 0; - - return error; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + return 0; } /** @@ -1912,7 +1932,7 @@ static int dump_inode(struct gfs2_inode *ip) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_glock(struct gfs2_glock *gl) +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) { struct gfs2_holder *gh; unsigned int x; @@ -1920,66 +1940,66 @@ static int dump_glock(struct gfs2_glock *gl) spin_lock(&gl->gl_spin); - printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - printk(KERN_INFO " gl_flags ="); + print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + print_dbg(gi, " gl_flags ="); for (x = 0; x < 32; x++) { if (test_bit(x, &gl->gl_flags)) - printk(" %u", x); - } - printk(" \n"); - printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref)); - printk(KERN_INFO " gl_state = %u\n", gl->gl_state); - printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); - print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); - printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); - printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); - printk(KERN_INFO " le = %s\n", + print_dbg(gi, " %u", x); + } + print_dbg(gi, " \n"); + print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); + print_dbg(gi, " gl_state = %u\n", gl->gl_state); + print_dbg(gi, " gl_owner = %s\n", gl->gl_owner->comm); + print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); + print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); + print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); + print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); + print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); + print_dbg(gi, " le = %s\n", (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); - printk(KERN_INFO " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); + print_dbg(gi, " reclaim = %s\n", + (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); if (gl->gl_aspace) - printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); + print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, + gl->gl_aspace->i_mapping->nrpages); else - printk(KERN_INFO " aspace = no\n"); - printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); + print_dbg(gi, " aspace = no\n"); + print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); if (gl->gl_req_gh) { - error = dump_holder("Request", gl->gl_req_gh); + error = dump_holder(gi, "Request", gl->gl_req_gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder("Holder", gh); + error = dump_holder(gi, "Holder", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder("Waiter1", gh); + error = dump_holder(gi, "Waiter1", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - error = dump_holder("Waiter2", gh); + error = dump_holder(gi, "Waiter2", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder("Waiter3", gh); + error = dump_holder(gi, "Waiter3", gh); if (error) goto out; } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gl->gl_object); + list_empty(&gl->gl_holders)) { + error = dump_inode(gi, gl->gl_object); if (error) goto out; } else { error = -ENOBUFS; - printk(KERN_INFO " Inode: busy\n"); + print_dbg(gi, " Inode: busy\n"); } } @@ -2014,7 +2034,7 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) if (gl->gl_sbd != sdp) continue; - error = dump_glock(gl); + error = dump_glock(NULL, gl); if (error) break; } @@ -2043,3 +2063,171 @@ int __init gfs2_glock_init(void) return 0; } +static int gfs2_glock_iter_next(struct glock_iter *gi) +{ + while (1) { + if (!gi->hb_list) { /* If we don't have a hash bucket yet */ + gi->hb_list = &gl_hash_table[gi->hash].hb_list; + if (hlist_empty(gi->hb_list)) { + gi->hash++; + gi->hb_list = NULL; + if (gi->hash >= GFS2_GL_HASH_SIZE) + return 1; + else + continue; + } + if (!hlist_empty(gi->hb_list)) { + gi->gl = list_entry(gi->hb_list->first, + struct gfs2_glock, + gl_list); + } + } else { + if (gi->gl->gl_list.next == NULL) { + gi->hash++; + gi->hb_list = NULL; + continue; + } + gi->gl = list_entry(gi->gl->gl_list.next, + struct gfs2_glock, gl_list); + } + if (gi->gl) + break; + } + return 0; +} + +static void gfs2_glock_iter_free(struct glock_iter *gi) +{ + kfree(gi); +} + +static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) +{ + struct glock_iter *gi; + + gi = kmalloc(sizeof (*gi), GFP_KERNEL); + if (!gi) + return NULL; + + gi->sdp = sdp; + gi->hash = 0; + gi->gl = NULL; + gi->hb_list = NULL; + gi->seq = NULL; + memset(gi->string, 0, sizeof(gi->string)); + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +{ + struct glock_iter *gi; + loff_t n = *pos; + + gi = gfs2_glock_iter_init(file->private); + if (!gi) + return NULL; + + while (n--) { + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + } + + return gi; +} + +static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct glock_iter *gi = iter_ptr; + + (*pos)++; + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +{ + /* nothing for now */ +} + +static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct glock_iter *gi = iter_ptr; + + gi->seq = file; + dump_glock(gi, gi->gl); + + return 0; +} + +static struct seq_operations gfs2_glock_seq_ops = { + .start = gfs2_glock_seq_start, + .next = gfs2_glock_seq_next, + .stop = gfs2_glock_seq_stop, + .show = gfs2_glock_seq_show, +}; + +static int gfs2_debugfs_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &gfs2_glock_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations gfs2_debug_fops = { + .owner = THIS_MODULE, + .open = gfs2_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) +{ + sdp->debugfs_dentry = debugfs_create_file(sdp->sd_table_name, + S_IFREG | S_IRUGO, + gfs2_root, sdp, + &gfs2_debug_fops); + if (!sdp->debugfs_dentry) + return -ENOMEM; + + return 0; +} + +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) +{ + if (sdp->debugfs_dentry) + debugfs_remove(sdp->debugfs_dentry); +} + +int gfs2_register_debugfs(void) +{ + gfs2_root = debugfs_create_dir("gfs2", NULL); + return gfs2_root ? 0 : -ENOMEM; +} + +void gfs2_unregister_debugfs(void) +{ + debugfs_remove(gfs2_root); +} diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index f50e40ceca4..d7cef740872 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -135,5 +135,9 @@ void gfs2_scand_internal(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); int __init gfs2_glock_init(void); +int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +int gfs2_register_debugfs(void); +void gfs2_unregister_debugfs(void); #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 49f0dbf40d8..7555261d911 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -611,6 +611,7 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct vfsmount *sd_gfs2mnt; + struct dentry *debugfs_dentry; /* for debugfs */ }; #endif /* __INCORE_DOT_H__ */ diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 6e8a59809ab..218395371db 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -103,6 +103,8 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; + gfs2_register_debugfs(); + printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); return 0; @@ -130,6 +132,7 @@ fail: static void __exit exit_gfs2_fs(void) { + gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ee54cb66708..ecb8b18de0e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -690,6 +690,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) if (error) goto fail; + gfs2_create_debugfs_file(sdp); + error = gfs2_sys_fs_add(sdp); if (error) goto fail; @@ -896,6 +898,7 @@ error: static void gfs2_kill_sb(struct super_block *sb) { + gfs2_delete_debugfs_file(sb->s_fs_info); kill_block_super(sb); } -- cgit v1.2.3 From 5c7342d894973636f03270673e1fb7b908a421a8 Mon Sep 17 00:00:00 2001 From: Josef Whiter Date: Wed, 7 Mar 2007 17:09:10 -0500 Subject: [GFS2] fix bz 231369, gfs2 will oops if you specify an invalid mount option If you specify an invalid mount option when trying to mount a gfs2 filesystem, gfs2 will oops. The attached patch resolves this problem. Signed-off-by: Josef Whiter Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9f203ef4da6..a3a24f2e99d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2217,7 +2217,7 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) { - if (sdp->debugfs_dentry) + if (sdp && sdp->debugfs_dentry) debugfs_remove(sdp->debugfs_dentry); } -- cgit v1.2.3 From 1de913909263ba7f7054debeda1b79771a7233db Mon Sep 17 00:00:00 2001 From: Josef Whiter Date: Mon, 12 Mar 2007 16:55:07 -0500 Subject: [GFS2] Fix bz 231380, unlock page before dequeing glocks in gfs2_commit_write If we are writing a file, and in the middle of writing the file another node attempts to get a shared lock on that file (by doing a du for example) the process doing the writing will hang waiting on lock_page. The reason for this is because when we have waiters on a exclusive glock, we will go through and flush out all dirty pages associated with that inode and release the lock. The problem is that when we flush the dirty pages, we could hit a page that we have locked durring the generic_file_buffered_write part of this operation. This patch unlocks the page before we go to dequeue the lock and locks it immediatly afterwards, since generic_file_buffered_write needs the page locked when the commit_write is completed. This patch resolves the problem, however if somebody sees a better way to do this please don't hesistate to yell. Signed-off-by: Josef Whiter Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index b3b7e847535..90c287932d5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -507,7 +507,9 @@ static int gfs2_commit_write(struct file *file, struct page *page, gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } + unlock_page(page); gfs2_glock_dq_m(1, &ip->i_gh); + lock_page(page); gfs2_holder_uninit(&ip->i_gh); return 0; @@ -520,7 +522,9 @@ fail_endtrans: gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } + unlock_page(page); gfs2_glock_dq_m(1, &ip->i_gh); + lock_page(page); gfs2_holder_uninit(&ip->i_gh); fail_nounlock: ClearPageUptodate(page); -- cgit v1.2.3 From 3b8249f6178cb2b68b9d683587797270125cc06a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 16 Mar 2007 09:40:31 +0000 Subject: [GFS2] Fix bz 224480 and cleanup glock demotion code This patch prevents the printing of a warning message in cases where the fs is functioning normally by handing off responsibility for unlinked, but still open inodes, to another node for eventual deallocation. Also, there is now an improved system for ensuring that such requests to other nodes do not get lost. The callback on the iopen lock is only ever called when i_nlink == 0 and when a node is unable to deallocate it due to it still being in use on another node. When a node receives the callback therefore, it knows that i_nlink must be zero, so we mark it as such (in gfs2_drop_inode) in order that it will then attempt deallocation of the inode itself. As an additional benefit, queuing a demote request no longer requires a memory allocation. This simplifies the code for dealing with gfs2_holders as it removes one special case. There are two new fields in struct gfs2_glock. gl_demote_state is the state which the remote node has requested and gl_demote_time is the time when the request came in. Both fields are only valid when the GLF_DEMOTE flag is set in gl_flags. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 233 ++++++++++++++-------------------------------------- fs/gfs2/glock.h | 2 +- fs/gfs2/incore.h | 8 +- fs/gfs2/main.c | 1 - fs/gfs2/ops_super.c | 28 ++++++- 5 files changed, 93 insertions(+), 179 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a3a24f2e99d..e7075945b05 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -54,7 +54,7 @@ struct glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static void gfs2_glock_xmote_th(struct gfs2_holder *gh); +static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); static void gfs2_glock_drop_th(struct gfs2_glock *gl); static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; @@ -212,7 +212,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); gfs2_assert(sdp, list_empty(&gl->gl_holders)); gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters2)); gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); glock_free(gl); rv = 1; @@ -399,7 +398,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * { gh->gh_state = state; gh->gh_flags = flags; - gh->gh_iflags &= 1 << HIF_ALLOCED; + gh->gh_iflags = 0; gh->gh_ip = (unsigned long)__builtin_return_address(0); } @@ -416,54 +415,8 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -/** - * gfs2_holder_get - get a struct gfs2_holder structure - * @gl: the glock - * @state: the state we're requesting - * @flags: the modifier flags - * @gfp_flags: - * - * Figure out how big an impact this function has. Either: - * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd - * 2) Leave it like it is - * - * Returns: the holder structure, NULL on ENOMEM - */ - -static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, - unsigned int state, - int flags, gfp_t gfp_flags) -{ - struct gfs2_holder *gh; - - gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags); - if (!gh) - return NULL; - - gfs2_holder_init(gl, state, flags, gh); - set_bit(HIF_ALLOCED, &gh->gh_iflags); - gh->gh_ip = (unsigned long)__builtin_return_address(0); - return gh; -} - -/** - * gfs2_holder_put - get rid of a struct gfs2_holder structure - * @gh: the holder structure - * - */ - -static void gfs2_holder_put(struct gfs2_holder *gh) -{ - gfs2_holder_uninit(gh); - kfree(gh); -} - -static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) +static void gfs2_holder_wake(struct gfs2_holder *gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { - gfs2_holder_put(gh); - return; - } clear_bit(HIF_WAIT, &gh->gh_iflags); smp_mb(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); @@ -529,7 +482,7 @@ static int rq_promote(struct gfs2_holder *gh) gfs2_reclaim_glock(sdp); } - gfs2_glock_xmote_th(gh); + gfs2_glock_xmote_th(gh->gh_gl, gh); spin_lock(&gl->gl_spin); } return 1; @@ -552,7 +505,7 @@ static int rq_promote(struct gfs2_holder *gh) gh->gh_error = 0; set_bit(HIF_HOLDER, &gh->gh_iflags); - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); return 0; } @@ -564,32 +517,24 @@ static int rq_promote(struct gfs2_holder *gh) * Returns: 1 if the queue is blocked */ -static int rq_demote(struct gfs2_holder *gh) +static int rq_demote(struct gfs2_glock *gl) { - struct gfs2_glock *gl = gh->gh_gl; - if (!list_empty(&gl->gl_holders)) return 1; - if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { - list_del_init(&gh->gh_list); - gh->gh_error = 0; - spin_unlock(&gl->gl_spin); - gfs2_holder_dispose_or_wake(gh); - spin_lock(&gl->gl_spin); - } else { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - if (gh->gh_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) - gfs2_glock_drop_th(gl); - else - gfs2_glock_xmote_th(gh); - - spin_lock(&gl->gl_spin); + if (gl->gl_state == gl->gl_demote_state || + gl->gl_state == LM_ST_UNLOCKED) { + clear_bit(GLF_DEMOTE, &gl->gl_flags); + return 0; } + set_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + if (gl->gl_demote_state == LM_ST_UNLOCKED || + gl->gl_state != LM_ST_EXCLUSIVE) + gfs2_glock_drop_th(gl); + else + gfs2_glock_xmote_th(gl, NULL); + spin_lock(&gl->gl_spin); return 0; } @@ -617,16 +562,8 @@ static void run_queue(struct gfs2_glock *gl) else gfs2_assert_warn(gl->gl_sbd, 0); - } else if (!list_empty(&gl->gl_waiters2) && - !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { - gh = list_entry(gl->gl_waiters2.next, - struct gfs2_holder, gh_list); - - if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) - blocked = rq_demote(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + blocked = rq_demote(gl); } else if (!list_empty(&gl->gl_waiters3)) { gh = list_entry(gl->gl_waiters3.next, struct gfs2_holder, gh_list); @@ -717,50 +654,24 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) } /** - * handle_callback - add a demote request to a lock's queue + * handle_callback - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * - * Note: This may fail sliently if we are out of memory. + * There are only two requests that we are going to see in actual + * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ static void handle_callback(struct gfs2_glock *gl, unsigned int state) { - struct gfs2_holder *gh, *new_gh = NULL; - -restart: spin_lock(&gl->gl_spin); - - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && - gl->gl_req_gh != gh) { - if (gh->gh_state != state) - gh->gh_state = LM_ST_UNLOCKED; - goto out; - } - } - - if (new_gh) { - list_add_tail(&new_gh->gh_list, &gl->gl_waiters2); - new_gh = NULL; - } else { - spin_unlock(&gl->gl_spin); - - new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS); - if (!new_gh) - return; - set_bit(HIF_DEMOTE, &new_gh->gh_iflags); - set_bit(HIF_DEALLOC, &new_gh->gh_iflags); - set_bit(HIF_WAIT, &new_gh->gh_iflags); - - goto restart; + if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { + gl->gl_demote_state = state; + gl->gl_demote_time = jiffies; + } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { + gl->gl_demote_state = state; } - -out: spin_unlock(&gl->gl_spin); - - if (new_gh) - gfs2_holder_put(new_gh); } /** @@ -820,56 +731,37 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) /* Deal with each possible exit condition */ - if (!gh) + if (!gh) { gl->gl_stamp = jiffies; - else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (ret & LM_OUT_CANCELED) + op_done = 0; + else + clear_bit(GLF_DEMOTE, &gl->gl_flags); + } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); gh->gh_error = -EIO; - spin_unlock(&gl->gl_spin); - } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - if (gl->gl_state == gh->gh_state || - gl->gl_state == LM_ST_UNLOCKED) { + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + goto out; + gh->gh_error = GLR_CANCELED; + if (ret & LM_OUT_CANCELED) + goto out; + if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); gh->gh_error = 0; - } else { - if (gfs2_assert_warn(sdp, gh->gh_flags & - (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) - fs_warn(sdp, "ret = 0x%.8X\n", ret); - gh->gh_error = GLR_TRYFAILED; + set_bit(HIF_HOLDER, &gh->gh_iflags); + set_bit(HIF_FIRST, &gh->gh_iflags); + op_done = 0; + goto out; } - spin_unlock(&gl->gl_spin); - - if (ret & LM_OUT_CANCELED) - handle_callback(gl, LM_ST_UNLOCKED); - - } else if (ret & LM_OUT_CANCELED) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - gh->gh_error = GLR_CANCELED; - spin_unlock(&gl->gl_spin); - - } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - spin_lock(&gl->gl_spin); - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - spin_unlock(&gl->gl_spin); - - set_bit(HIF_FIRST, &gh->gh_iflags); - - op_done = 0; - - } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); gh->gh_error = GLR_TRYFAILED; - spin_unlock(&gl->gl_spin); - - } else { + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + goto out; + gh->gh_error = -EINVAL; if (gfs2_assert_withdraw(sdp, 0) == -1) fs_err(sdp, "ret = 0x%.8X\n", ret); +out: + spin_unlock(&gl->gl_spin); } if (glops->go_xmote_bh) @@ -887,7 +779,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); if (gh) - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); } /** @@ -898,12 +790,11 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_xmote_th(struct gfs2_holder *gh) +void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; - int flags = gh->gh_flags; - unsigned state = gh->gh_state; + int flags = gh ? gh->gh_flags : 0; + unsigned state = gh ? gh->gh_state : gl->gl_demote_state; const struct gfs2_glock_operations *glops = gl->gl_ops; int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_ANY | @@ -953,6 +844,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); + clear_bit(GLF_DEMOTE, &gl->gl_flags); if (glops->go_inval) glops->go_inval(gl, DIO_METADATA); @@ -974,7 +866,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); if (gh) - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); } /** @@ -1291,9 +1183,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) if (glops->go_unlock) glops->go_unlock(gh); - gl->gl_stamp = jiffies; - spin_lock(&gl->gl_spin); + gl->gl_stamp = jiffies; } clear_bit(GLF_LOCK, &gl->gl_flags); @@ -1981,16 +1872,16 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) if (error) goto out; } - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - error = dump_holder(gi, "Waiter2", gh); - if (error) - goto out; - } list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { error = dump_holder(gi, "Waiter3", gh); if (error) goto out; } + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", + gl->gl_demote_state, + (u64)(jiffies - gl->gl_demote_time)*1000000/HZ); + } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && list_empty(&gl->gl_holders)) { diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index d7cef740872..5e662eadc6f 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -67,7 +67,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) { int ret; spin_lock(&gl->gl_spin); - ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3); + ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); spin_unlock(&gl->gl_spin); return ret; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7555261d911..9c125823d76 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -115,11 +115,8 @@ enum { /* Actions */ HIF_MUTEX = 0, HIF_PROMOTE = 1, - HIF_DEMOTE = 2, /* States */ - HIF_ALLOCED = 4, - HIF_DEALLOC = 5, HIF_HOLDER = 6, HIF_FIRST = 7, HIF_ABORTED = 9, @@ -142,8 +139,8 @@ struct gfs2_holder { enum { GLF_LOCK = 1, GLF_STICKY = 2, + GLF_DEMOTE = 3, GLF_DIRTY = 5, - GLF_SKIP_WAITERS2 = 6, }; struct gfs2_glock { @@ -156,11 +153,12 @@ struct gfs2_glock { unsigned int gl_state; unsigned int gl_hash; + unsigned int gl_demote_state; /* state requested by remote node */ + unsigned long gl_demote_time; /* time of first demote request */ struct task_struct *gl_owner; unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ - struct list_head gl_waiters2; /* HIF_DEMOTE */ struct list_head gl_waiters3; /* HIF_PROMOTE */ const struct gfs2_glock_operations *gl_ops; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 218395371db..c4bb374eaf9 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -45,7 +45,6 @@ static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_waiters1); - INIT_LIST_HEAD(&gl->gl_waiters2); INIT_LIST_HEAD(&gl->gl_waiters3); gl->gl_lvb = NULL; atomic_set(&gl->gl_lvb_count, 0); diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index b89999d3a76..485ce3d4992 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -283,6 +283,31 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) return error; } +/** + * gfs2_drop_inode - Drop an inode (test for remote unlink) + * @inode: The inode to drop + * + * If we've received a callback on an iopen lock then its because a + * remote node tried to deallocate the inode but failed due to this node + * still having the inode open. Here we mark the link count zero + * since we know that it must have reached zero if the GLF_DEMOTE flag + * is set on the iopen glock. If we didn't do a disk read since the + * remote node removed the final link then we might otherwise miss + * this event. This check ensures that this node will deallocate the + * inode's blocks, or alternatively pass the baton on to another + * node for later deallocation. + */ +static void gfs2_drop_inode(struct inode *inode) +{ + if (inode->i_private && inode->i_nlink) { + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + clear_nlink(inode); + } + generic_drop_inode(inode); +} + /** * gfs2_clear_inode - Deallocate an inode when VFS is done with it * @inode: The VFS inode @@ -441,7 +466,7 @@ out_unlock: out_uninit: gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); - if (error) + if (error && error != GLR_TRYFAILED) fs_warn(sdp, "gfs2_delete_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); @@ -481,6 +506,7 @@ const struct super_operations gfs2_super_ops = { .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, .clear_inode = gfs2_clear_inode, + .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, }; -- cgit v1.2.3 From 420d2a1028b906f24e836e37089a6ad55ab1848f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Sun, 18 Mar 2007 16:05:27 +0000 Subject: [GFS2] Fix a bug on i386 due to evaluation order Since gcc didn't evaluate the last two terms of the expression in glock.c:1881 as a constant expression, it resulted in an error on i386 due to the lack of a 64bit divide instruction. This adds some brackets to fix the problem. This was reported by Andrew Morton. Signed-off-by: Steven Whitehouse Cc: Andrew Morton --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e7075945b05..b8aa816bb6e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1880,7 +1880,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", gl->gl_demote_state, - (u64)(jiffies - gl->gl_demote_time)*1000000/HZ); + (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && -- cgit v1.2.3 From f35ac346bc48b2086aa94f031baf1f6237a89de6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Sun, 18 Mar 2007 17:04:15 +0000 Subject: [GFS2] Speed up lock_dlm's locking (move sprintf) The following patch speeds up lock_dlm's locking by moving the sprintf out from the lock acquisition path and into the lock creation path. This reduces the amount of CPU time used in acquiring locks by a fair amount. Signed-off-by: Steven Whitehouse Acked-by: David Teigland --- fs/gfs2/locking/dlm/lock.c | 10 ++++------ fs/gfs2/locking/dlm/lock_dlm.h | 1 + 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index b167addf9fd..f9c8bda289a 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c @@ -151,7 +151,7 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, /* make_strname - convert GFS lock numbers to a string */ -static inline void make_strname(struct lm_lockname *lockname, +static inline void make_strname(const struct lm_lockname *lockname, struct gdlm_strname *str) { sprintf(str->name, "%8x%16llx", lockname->ln_type, @@ -169,6 +169,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, return -ENOMEM; lp->lockname = *name; + make_strname(name, &lp->strname); lp->ls = ls; lp->cur = DLM_LOCK_IV; lp->lvb = NULL; @@ -227,7 +228,6 @@ void gdlm_put_lock(void *lock) unsigned int gdlm_do_lock(struct gdlm_lock *lp) { struct gdlm_ls *ls = lp->ls; - struct gdlm_strname str; int error, bast = 1; /* @@ -249,8 +249,6 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) if (test_bit(LFL_NOBAST, &lp->flags)) bast = 0; - make_strname(&lp->lockname, &str); - set_bit(LFL_ACTIVE, &lp->flags); log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, @@ -258,8 +256,8 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) lp->cur, lp->req, lp->lkf); error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, - str.name, str.namelen, 0, gdlm_ast, lp, - bast ? gdlm_bast : NULL); + lp->strname.name, lp->strname.namelen, 0, gdlm_ast, + lp, bast ? gdlm_bast : NULL); if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { lp->lksb.sb_status = -EAGAIN; diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a87c7bf3c56..6888bd49c31 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -106,6 +106,7 @@ enum { struct gdlm_lock { struct gdlm_ls *ls; struct lm_lockname lockname; + struct gdlm_strname strname; char *lvb; struct dlm_lksb lksb; -- cgit v1.2.3 From 6883562588bc6c70776ecc396ee7eda36c2c8da9 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 23 Mar 2007 09:05:12 +0000 Subject: [GFS2] Fix log entry list corruption When glock_lo_add and rg_lo_add attempt to add an element to the log, they check to see if has already been added before locking the log. If another process adds that element to the log in this window between the check and locking the log, the element will be added to the list twice. This causes the log element list to become corrupted in such a way that the log element can never be successfully removed from the list. This patch pulls the list_empty() check inside the log lock, to remove this window. Signed-off-by: Benjamin E. Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/lops.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 16bb4b4561a..f82d84d05d2 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -33,16 +33,17 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) tr->tr_touched = 1; - if (!list_empty(&le->le_list)) - return; - gl = container_of(le, struct gfs2_glock, gl_le); if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) return; - gfs2_glock_hold(gl); - set_bit(GLF_DIRTY, &gl->gl_flags); gfs2_log_lock(sdp); + if (!list_empty(&le->le_list)){ + gfs2_log_unlock(sdp); + return; + } + gfs2_glock_hold(gl); + set_bit(GLF_DIRTY, &gl->gl_flags); sdp->sd_log_num_gl++; list_add(&le->le_list, &sdp->sd_log_le_gl); gfs2_log_unlock(sdp); @@ -415,13 +416,14 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) tr->tr_touched = 1; - if (!list_empty(&le->le_list)) - return; - rgd = container_of(le, struct gfs2_rgrpd, rd_le); - gfs2_rgrp_bh_hold(rgd); gfs2_log_lock(sdp); + if (!list_empty(&le->le_list)){ + gfs2_log_unlock(sdp); + return; + } + gfs2_rgrp_bh_hold(rgd); sdp->sd_log_num_rg++; list_add(&le->le_list, &sdp->sd_log_le_rg); gfs2_log_unlock(sdp); -- cgit v1.2.3 From 172e045a7fcc3ee647fa70dbd585a3c247b49cb2 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 23 Mar 2007 14:51:56 -0600 Subject: [GFS2] flush the log if a transaction can't allocate space This is a fix for bz #208514. When GFS2 frees up space, the freed blocks aren't available for reuse until the resource group is successfully written to the ondisk journal. So in rare cases, GFS2 operations will fail, saying that the filesystem is out of space, when in reality, you are just waiting for a log flush. For instance, on a 1Gig filesystem, if I continually write 10 Mb to a file, and then truncate it, after a hundred interations, the write will fail with -ENOSPC, even though the filesystem is just 1% full. The attached patch calls a log flush in these cases. I tested this patch fairly heavily to check if there were any locking issues that I missed, and it seems to work just fine. Also, this patch only does the log flush if get_local_rgrp makes a complete loop of resource groups without skipping any do to locking issues. The code would be slightly simpler if it just always did the log flush after the first failed pass, and you could only ever have to go through the loop twice, instead of up to three times. However, I guessed that failing to find a rg simply do to locking issues would be common enough to skip the log flush in that case, but I'm not certain that this is the right way to go. Either way, I don't suppose this code will be hit all that often. Signed-off-by: Benjamin E. Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8d9c08b5c4b..2ce48d4f246 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -27,6 +27,7 @@ #include "trans.h" #include "ops_file.h" #include "util.h" +#include "log.h" #define BFITNOENT ((u32)~0) @@ -941,9 +942,13 @@ static int get_local_rgrp(struct gfs2_inode *ip) rgd = gfs2_rgrpd_get_first(sdp); if (rgd == begin) { - if (++loops >= 2 || !skipped) + if (++loops >= 3) return -ENOSPC; + if (!skipped) + loops++; flags = 0; + if (loops == 2) + gfs2_log_flush(sdp, NULL); } } -- cgit v1.2.3 From 04b933f27bc8e7f3f6423020cec58a4eab3bb7a7 Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Fri, 23 Mar 2007 17:05:15 -0500 Subject: [GFS2] Red Hat bz 228540: owner references In Testing the previously posted and accepted patch for https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228540 I uncovered some gfs2 badness. It turns out that the current gfs2 code saves off a process pointer when glocks is taken in both the glock and glock holder structures. Those structures will persist in memory long after the process has ended; pointers to poisoned memory. This problem isn't caused by the 228540 fix; the new capability introduced by the fix just uncovered the problem. I wrote this patch that avoids saving process pointers and instead saves off the process pid. Rather than referencing the bad pointers, it now does process lookups. There is special code that makes the output nicer for printing holder information for processes that have ended. This patch also adds a stub for the new "sprint_symbol" function that exists in Andrew Morton's -mm patch set, but won't go into the base kernel until 2.6.22, since it adds functionality but doesn't fix a bug. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 75 ++++++++++++++++++++++++++++++++++++++++++-------------- fs/gfs2/glock.h | 2 +- fs/gfs2/incore.h | 4 +-- 3 files changed, 59 insertions(+), 22 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b8aa816bb6e..d2e3094c40f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -54,6 +56,7 @@ struct glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); static void gfs2_glock_drop_th(struct gfs2_glock *gl); static DECLARE_RWSEM(gfs2_umount_flush_sem); @@ -64,6 +67,7 @@ static struct dentry *gfs2_root; #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct dentry *gfs2_root; /* * Despite what you might think, the numbers below are not arbitrary :-) @@ -312,7 +316,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; gl->gl_hash = hash; - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; gl->gl_ops = glops; gl->gl_req_gh = NULL; @@ -376,7 +380,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner = current; + gh->gh_owner_pid = current->pid; gh->gh_state = state; gh->gh_flags = flags; gh->gh_error = 0; @@ -601,7 +605,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { list_add_tail(&gh.gh_list, &gl->gl_waiters1); } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); clear_bit(HIF_WAIT, &gh.gh_iflags); smp_mb(); @@ -628,7 +632,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { acquired = 0; } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); } spin_unlock(&gl->gl_spin); @@ -646,7 +650,7 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) { spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; run_queue(gl); BUG_ON(!spin_is_locked(&gl->gl_spin)); @@ -999,12 +1003,12 @@ static int glock_wait_internal(struct gfs2_holder *gh) } static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct task_struct *owner) +find_holder_by_owner(struct list_head *head, pid_t pid) { struct gfs2_holder *gh; list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner == owner) + if (gh->gh_owner_pid == pid) return gh; } @@ -1036,24 +1040,24 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_holder *existing; - BUG_ON(!gh->gh_owner); + BUG_ON(!gh->gh_owner_pid); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); + existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); + printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); printk(KERN_INFO "lock type : %d lock state : %d\n", existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); + printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); printk(KERN_INFO "lock type : %d lock state : %d\n", gl->gl_name.ln_type, gl->gl_state); BUG(); } - existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); + existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); @@ -1756,6 +1760,22 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Diagnostic routines to help debug distributed deadlock */ +static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, + unsigned long address) +{ +/* when sprint_symbol becomes available in the new kernel, replace this */ +/* function with: + char buffer[KSYM_SYMBOL_LEN]; + + sprint_symbol(buffer, address); + print_dbg(gi, fmt, buffer); +*/ + if (gi) + print_dbg(gi, fmt, address); + else + print_symbol(fmt, address); +} + /** * dump_holder - print information about a glock holder * @str: a string naming the type of holder @@ -1768,10 +1788,18 @@ static int dump_holder(struct glock_iter *gi, char *str, struct gfs2_holder *gh) { unsigned int x; + struct task_struct *gh_owner; print_dbg(gi, " %s\n", str); - print_dbg(gi, " owner = %ld\n", - (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); + if (gh->gh_owner_pid) { + print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); + gh_owner = find_task_by_pid(gh->gh_owner_pid); + if (gh_owner) + print_dbg(gi, "(%s)\n", gh_owner->comm); + else + print_dbg(gi, "(ended)\n"); + } else + print_dbg(gi, " owner = -1\n"); print_dbg(gi, " gh_state = %u\n", gh->gh_state); print_dbg(gi, " gh_flags ="); for (x = 0; x < 32; x++) @@ -1784,10 +1812,7 @@ static int dump_holder(struct glock_iter *gi, char *str, if (test_bit(x, &gh->gh_iflags)) print_dbg(gi, " %u", x); print_dbg(gi, " \n"); - if (gi) - print_dbg(gi, " initialized at: 0x%x\n", gh->gh_ip); - else - print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); + gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); return 0; } @@ -1828,6 +1853,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) struct gfs2_holder *gh; unsigned int x; int error = -ENOBUFS; + struct task_struct *gl_owner; spin_lock(&gl->gl_spin); @@ -1838,10 +1864,21 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) if (test_bit(x, &gl->gl_flags)) print_dbg(gi, " %u", x); } + if (!test_bit(GLF_LOCK, &gl->gl_flags)) + print_dbg(gi, " (unlocked)"); print_dbg(gi, " \n"); print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); print_dbg(gi, " gl_state = %u\n", gl->gl_state); - print_dbg(gi, " gl_owner = %s\n", gl->gl_owner->comm); + if (gl->gl_owner_pid) { + gl_owner = find_task_by_pid(gl->gl_owner_pid); + if (gl_owner) + print_dbg(gi, " gl_owner = pid %d (%s)\n", + gl->gl_owner_pid, gl_owner->comm); + else + print_dbg(gi, " gl_owner = %d (ended)\n", + gl->gl_owner_pid); + } else + print_dbg(gi, " gl_owner = -1\n"); print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 5e662eadc6f..11477ca3a3c 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -38,7 +38,7 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) /* Look in glock's list of holders for one with current task as owner */ spin_lock(&gl->gl_spin); list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (gh->gh_owner == current) { + if (gh->gh_owner_pid == current->pid) { locked = 1; break; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 9c125823d76..fdf04705906 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -127,7 +127,7 @@ struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; - struct task_struct *gh_owner; + pid_t gh_owner_pid; unsigned int gh_state; unsigned gh_flags; @@ -155,7 +155,7 @@ struct gfs2_glock { unsigned int gl_hash; unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ - struct task_struct *gl_owner; + pid_t gl_owner_pid; unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ -- cgit v1.2.3 From b9af8a788ade3435b53667873774b5366cf73f58 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 28 Mar 2007 11:08:04 -0500 Subject: [GFS2] use log_error before LM_OUT_ERROR We always want to see the details of the error returned to gfs, but log_debug is often turned off, so use log_error (printk). Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/lock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index f9c8bda289a..c305255bfe8 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c @@ -266,7 +266,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) } if (error) { - log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " + log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " "flags=%lx", ls->fsname, lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number, error, lp->cur, lp->req, lp->lkf, lp->flags); @@ -294,7 +294,7 @@ static unsigned int gdlm_do_unlock(struct gdlm_lock *lp) error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); if (error) { - log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " + log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " "flags=%lx", ls->fsname, lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number, error, lp->cur, lp->req, lp->lkf, lp->flags); -- cgit v1.2.3 From f01963f2648cfd708ee8d521b3737cfa55ea8795 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 2 Apr 2007 10:03:24 +0100 Subject: [GFS2] Set drop_count to 0 (off) by default This sets the drop_count to 0 by default which is a better default for most people. Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/lock_dlm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 6888bd49c31..d074c6e6f9b 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -36,7 +36,7 @@ #define GDLM_STRNAME_BYTES 24 #define GDLM_LVB_SIZE 32 -#define GDLM_DROP_COUNT 200000 +#define GDLM_DROP_COUNT 0 #define GDLM_DROP_PERIOD 60 #define GDLM_NAME_LEN 128 -- cgit v1.2.3 From a43a49066d36612f3bb46653cdb265a89c235eff Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 2 Apr 2007 10:48:17 +0100 Subject: [GFS2] Fix bz 234168 (ignoring rgrp flags) Ths following patch makes GFS2 use the rgrp flags properly. Although there are also separate flags for both data and metadata as well, I've not implemented these as there seems little use for them. On the otherhand, the "noalloc" flag is generally useful for future changes we might which to make, so this ensures that we interpret it correctly. In addition I fixed the comment above the function which was incorrect. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 2ce48d4f246..1727f5012ef 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -698,8 +698,6 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) * @al: the struct gfs2_alloc structure describing the reservation * * If there's room for the requested blocks to be allocated from the RG: - * Sets the $al_reserved_data field in @al. - * Sets the $al_reserved_meta field in @al. * Sets the $al_rgd field in @al. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) @@ -710,6 +708,9 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; + if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) + return 0; + spin_lock(&sdp->sd_rindex_spin); if (rgd->rd_free_clone >= al->al_requested) { al->al_rgd = rgd; -- cgit v1.2.3 From 7a0079d9e3fe8826475a08785f3d348c4b509774 Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Tue, 17 Apr 2007 11:37:11 -0500 Subject: [GFS2] bz 236008: Kernel gpf doing cat /debugfs/gfs2/xxx (lock dump) This is for Bugzilla Bug 236008: Kernel gpf doing cat /debugfs/gfs2/xxx (lock dump) seen at the "gfs2 summit". This also fixes the bug that caused garbage to be printed by the "initialized at" field. I apologize for the kludge, but that code will all be ripped out anyway when the official sprint_symbol function becomes available in the Linux kernel. I also changed some formatting so that spaces are replaced by proper tabs. Signed-off-by: Robert Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d2e3094c40f..b075f9359c6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1765,15 +1765,20 @@ static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, { /* when sprint_symbol becomes available in the new kernel, replace this */ /* function with: - char buffer[KSYM_SYMBOL_LEN]; + char buffer[KSYM_SYMBOL_LEN]; - sprint_symbol(buffer, address); - print_dbg(gi, fmt, buffer); + sprint_symbol(buffer, address); + print_dbg(gi, fmt, buffer); */ - if (gi) - print_dbg(gi, fmt, address); - else - print_symbol(fmt, address); + char buffer[256]; + + if (gi) { + memset(buffer, 0, sizeof(buffer)); + sprintf(buffer, "%p", address); + print_dbg(gi, fmt, buffer); + } + else + print_symbol(fmt, address); } /** @@ -1993,14 +1998,19 @@ int __init gfs2_glock_init(void) static int gfs2_glock_iter_next(struct glock_iter *gi) { + read_lock(gl_lock_addr(gi->hash)); while (1) { if (!gi->hb_list) { /* If we don't have a hash bucket yet */ gi->hb_list = &gl_hash_table[gi->hash].hb_list; if (hlist_empty(gi->hb_list)) { + read_unlock(gl_lock_addr(gi->hash)); gi->hash++; + read_lock(gl_lock_addr(gi->hash)); gi->hb_list = NULL; - if (gi->hash >= GFS2_GL_HASH_SIZE) + if (gi->hash >= GFS2_GL_HASH_SIZE) { + read_unlock(gl_lock_addr(gi->hash)); return 1; + } else continue; } @@ -2011,7 +2021,9 @@ static int gfs2_glock_iter_next(struct glock_iter *gi) } } else { if (gi->gl->gl_list.next == NULL) { + read_unlock(gl_lock_addr(gi->hash)); gi->hash++; + read_lock(gl_lock_addr(gi->hash)); gi->hb_list = NULL; continue; } @@ -2021,6 +2033,7 @@ static int gfs2_glock_iter_next(struct glock_iter *gi) if (gi->gl) break; } + read_unlock(gl_lock_addr(gi->hash)); return 0; } -- cgit v1.2.3 From bdd19a22f85a7039e01accd8717eaec4addd9dfd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 18 Apr 2007 09:38:42 +0100 Subject: [GFS2] Patch to detect corrupt number of dir entries in leaf and/or inode blocks This patch detects when the number of entries in a leaf block or inode block (in the case of stuffed directories) is corrupt and informs the user. It prevents us from running off the end of the array thats been allocated for the sorting in this case, Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 82a1ac7895a..6c3ed7674a9 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1262,9 +1262,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; struct gfs2_leaf *lf; - unsigned entries = 0; + unsigned entries = 0, entries2 = 0; unsigned leaves = 0; const struct gfs2_dirent **darr, *dent; struct dirent_gather g; @@ -1290,7 +1291,13 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, return 0; error = -ENOMEM; - larr = vmalloc((leaves + entries) * sizeof(void *)); + /* + * The extra 99 entries are not normally used, but are a buffer + * zone in case the number of entries in the leaf is corrupt. + * 99 is the maximum number of entries that can fit in a single + * leaf block. + */ + larr = vmalloc((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out; darr = (const struct gfs2_dirent **)(larr + leaves); @@ -1305,10 +1312,18 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, lf = (struct gfs2_leaf *)bh->b_data; lfn = be64_to_cpu(lf->lf_next); if (lf->lf_entries) { + entries2 += be16_to_cpu(lf->lf_entries); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_gather, NULL, &g); error = PTR_ERR(dent); - if (IS_ERR(dent)) { + if (IS_ERR(dent)) + goto out_kfree; + if (entries2 != g.offset) { + fs_warn(sdp, "Number of entries corrupt in dir leaf %llu, " + "entries2 (%u) != g.offset (%u)\n", + (u64)bh->b_blocknr, entries2, g.offset); + + error = -EIO; goto out_kfree; } error = 0; @@ -1318,6 +1333,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, } } while(lfn); + BUG_ON(entries2 != entries); error = do_filldir_main(ip, offset, opaque, filldir, darr, entries, copied); out_kfree: @@ -1401,6 +1417,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct dirent_gather g; const struct gfs2_dirent **darr, *dent; struct buffer_head *dibh; @@ -1423,8 +1440,8 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, return error; error = -ENOMEM; - darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), - GFP_KERNEL); + /* 96 is max number of dirents which can be stuffed into an inode */ + darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); if (darr) { g.pdent = darr; g.offset = 0; @@ -1434,6 +1451,14 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, error = PTR_ERR(dent); goto out; } + if (dip->i_di.di_entries != g.offset) { + fs_warn(sdp, "Number of entries corrupt in dir %llu, " + "ip->i_di.di_entries (%u) != g.offset (%u)\n", + dip->i_num.no_addr, dip->i_di.di_entries, + g.offset); + error = -EIO; + goto out; + } error = do_filldir_main(dip, offset, opaque, filldir, darr, dip->i_di.di_entries, &copied); out: -- cgit v1.2.3 From 5f8820960cf4fb621483d4a37c24939ad831bfe7 Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Wed, 18 Apr 2007 11:41:11 -0500 Subject: [GFS2] lockdump improvements The patch below consists of the following changes (in code order): 1. I fixed a minor compiler warning regarding the printing of a kernel symbol address. 2. I implemented a suggestion from Dave Teigland that moves the debugfs information for gfs2 into a subdirectory so we can easily expand our use of debugfs in the future. The current code keeps the glock information in: /debug/gfs2/ With the patch, the new code keeps the glock information in: /debug/gfs2//glock That will allow us to create more debugfs files in the future. 3. This fixes a bug whereby a failed mount attempt causes the debugfs file to not be deleted. Failed mount attempts should always clean up after themselves, including deleting the debugfs file and/or directory. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 26 ++++++++++++++++++-------- fs/gfs2/incore.h | 3 ++- fs/gfs2/ops_fstype.c | 1 + 3 files changed, 21 insertions(+), 9 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b075f9359c6..7988715b7a0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1774,7 +1774,7 @@ static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, if (gi) { memset(buffer, 0, sizeof(buffer)); - sprintf(buffer, "%p", address); + sprintf(buffer, "0x%08lx", address); print_dbg(gi, fmt, buffer); } else @@ -2146,11 +2146,14 @@ static const struct file_operations gfs2_debug_fops = { int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) { - sdp->debugfs_dentry = debugfs_create_file(sdp->sd_table_name, - S_IFREG | S_IRUGO, - gfs2_root, sdp, - &gfs2_debug_fops); - if (!sdp->debugfs_dentry) + sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (!sdp->debugfs_dir) + return -ENOMEM; + sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_debug_fops); + if (!sdp->debugfs_dentry_glocks) return -ENOMEM; return 0; @@ -2158,8 +2161,14 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) { - if (sdp && sdp->debugfs_dentry) - debugfs_remove(sdp->debugfs_dentry); + if (sdp && sdp->debugfs_dir) { + if (sdp->debugfs_dentry_glocks) { + debugfs_remove(sdp->debugfs_dentry_glocks); + sdp->debugfs_dentry_glocks = NULL; + } + debugfs_remove(sdp->debugfs_dir); + sdp->debugfs_dir = NULL; + } } int gfs2_register_debugfs(void) @@ -2171,4 +2180,5 @@ int gfs2_register_debugfs(void) void gfs2_unregister_debugfs(void) { debugfs_remove(gfs2_root); + gfs2_root = NULL; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index fdf04705906..d995441373a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -609,7 +609,8 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct vfsmount *sd_gfs2mnt; - struct dentry *debugfs_dentry; /* for debugfs */ + struct dentry *debugfs_dir; /* debugfs directory */ + struct dentry *debugfs_dentry_glocks; /* for debugfs */ }; #endif /* __INCORE_DOT_H__ */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ecb8b18de0e..2c5f8e7def0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -756,6 +756,7 @@ fail_lm: fail_sys: gfs2_sys_fs_del(sdp); fail: + gfs2_delete_debugfs_file(sdp); kfree(sdp); sb->s_fs_info = NULL; return error; -- cgit v1.2.3 From 476c006be009d4121e401a9e9f49a3362a7a272f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 23 Apr 2007 11:55:39 -0400 Subject: [GFS2] use lib/parser for parsing mount options This patch converts the mount option parsing to use the kernels lib/parser stuff like all of the other filesystems. I tested this and it works well. Thank you, Signed-off-by: Josef Bacik Signed-off-by: Steven Whitehouse --- fs/gfs2/mount.c | 239 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 143 insertions(+), 96 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index 32caecd2030..4864659555d 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -20,6 +21,52 @@ #include "sys.h" #include "util.h" +enum { + Opt_lockproto, + Opt_locktable, + Opt_hostdata, + Opt_spectator, + Opt_ignore_local_fs, + Opt_localflocks, + Opt_localcaching, + Opt_debug, + Opt_nodebug, + Opt_upgrade, + Opt_num_glockd, + Opt_acl, + Opt_noacl, + Opt_quota_off, + Opt_quota_account, + Opt_quota_on, + Opt_suiddir, + Opt_nosuiddir, + Opt_data_writeback, + Opt_data_ordered, +}; + +static match_table_t tokens = { + {Opt_lockproto, "lockproto=%s"}, + {Opt_locktable, "locktable=%s"}, + {Opt_hostdata, "hostdata=%s"}, + {Opt_spectator, "spectator"}, + {Opt_ignore_local_fs, "ignore_local_fs"}, + {Opt_localflocks, "localflocks"}, + {Opt_localcaching, "localcaching"}, + {Opt_debug, "debug"}, + {Opt_nodebug, "nodebug"}, + {Opt_upgrade, "upgrade"}, + {Opt_num_glockd, "num_glockd=%d"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_quota_off, "quota=off"}, + {Opt_quota_account, "quota=account"}, + {Opt_quota_on, "quota=on"}, + {Opt_suiddir, "suiddir"}, + {Opt_nosuiddir, "nosuiddir"}, + {Opt_data_writeback, "data=writeback"}, + {Opt_data_ordered, "data=ordered"} +}; + /** * gfs2_mount_args - Parse mount options * @sdp: @@ -54,146 +101,150 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) process them */ for (options = data; (o = strsep(&options, ",")); ) { + int token, option; + substring_t tmp[MAX_OPT_ARGS]; + if (!*o) continue; - v = strchr(o, '='); - if (v) - *v++ = 0; + token = match_token(o, tokens, tmp); + switch (token) { + case Opt_lockproto: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for lockproto\n"); + error = -ENOMEM; + goto out_error; + } - if (!strcmp(o, "lockproto")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_lockproto)) + if (remount && strcmp(v, args->ar_lockproto)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; - } + kfree(v); + break; + case Opt_locktable: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for locktable\n"); + error = -ENOMEM; + goto out_error; + } - else if (!strcmp(o, "locktable")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_locktable)) + if (remount && strcmp(v, args->ar_locktable)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); - args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; - } + args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; + kfree(v); + break; + case Opt_hostdata: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for hostdata\n"); + error = -ENOMEM; + goto out_error; + } - else if (!strcmp(o, "hostdata")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_hostdata)) + if (remount && strcmp(v, args->ar_hostdata)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; - } - - else if (!strcmp(o, "spectator")) { + kfree(v); + break; + case Opt_spectator: if (remount && !args->ar_spectator) goto cant_remount; args->ar_spectator = 1; sdp->sd_vfs->s_flags |= MS_RDONLY; - } - - else if (!strcmp(o, "ignore_local_fs")) { + break; + case Opt_ignore_local_fs: if (remount && !args->ar_ignore_local_fs) goto cant_remount; args->ar_ignore_local_fs = 1; - } - - else if (!strcmp(o, "localflocks")) { + break; + case Opt_localflocks: if (remount && !args->ar_localflocks) goto cant_remount; args->ar_localflocks = 1; - } - - else if (!strcmp(o, "localcaching")) { + break; + case Opt_localcaching: if (remount && !args->ar_localcaching) goto cant_remount; args->ar_localcaching = 1; - } - - else if (!strcmp(o, "debug")) + break; + case Opt_debug: args->ar_debug = 1; - - else if (!strcmp(o, "nodebug")) + break; + case Opt_nodebug: args->ar_debug = 0; - - else if (!strcmp(o, "upgrade")) { + break; + case Opt_upgrade: if (remount && !args->ar_upgrade) goto cant_remount; args->ar_upgrade = 1; - } + break; + case Opt_num_glockd: + if ((error = match_int(&tmp[0], &option))) { + fs_info(sdp, "problem getting num_glockd\n"); + goto out_error; + } - else if (!strcmp(o, "num_glockd")) { - unsigned int x; - if (!v) - goto need_value; - sscanf(v, "%u", &x); - if (remount && x != args->ar_num_glockd) + if (remount && option != args->ar_num_glockd) goto cant_remount; - if (!x || x > GFS2_GLOCKD_MAX) { + if (!option || option > GFS2_GLOCKD_MAX) { fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", - GFS2_GLOCKD_MAX, x); + GFS2_GLOCKD_MAX, option); error = -EINVAL; - break; + goto out_error; } - args->ar_num_glockd = x; - } - - else if (!strcmp(o, "acl")) { + args->ar_num_glockd = option; + break; + case Opt_acl: args->ar_posix_acl = 1; sdp->sd_vfs->s_flags |= MS_POSIXACL; - } - - else if (!strcmp(o, "noacl")) { + break; + case Opt_noacl: args->ar_posix_acl = 0; sdp->sd_vfs->s_flags &= ~MS_POSIXACL; - } - - else if (!strcmp(o, "quota")) { - if (!v) - goto need_value; - if (!strcmp(v, "off")) - args->ar_quota = GFS2_QUOTA_OFF; - else if (!strcmp(v, "account")) - args->ar_quota = GFS2_QUOTA_ACCOUNT; - else if (!strcmp(v, "on")) - args->ar_quota = GFS2_QUOTA_ON; - else { - fs_info(sdp, "invalid value for quota\n"); - error = -EINVAL; - break; - } - } - - else if (!strcmp(o, "suiddir")) + break; + case Opt_quota_off: + args->ar_quota = GFS2_QUOTA_OFF; + break; + case Opt_quota_account: + args->ar_quota = GFS2_QUOTA_ACCOUNT; + break; + case Opt_quota_on: + args->ar_quota = GFS2_QUOTA_ON; + break; + case Opt_suiddir: args->ar_suiddir = 1; - - else if (!strcmp(o, "nosuiddir")) + break; + case Opt_nosuiddir: args->ar_suiddir = 0; - - else if (!strcmp(o, "data")) { - if (!v) - goto need_value; - if (!strcmp(v, "writeback")) - args->ar_data = GFS2_DATA_WRITEBACK; - else if (!strcmp(v, "ordered")) - args->ar_data = GFS2_DATA_ORDERED; - else { - fs_info(sdp, "invalid value for data\n"); - error = -EINVAL; - break; - } - } - - else { + break; + case Opt_data_writeback: + args->ar_data = GFS2_DATA_WRITEBACK; + break; + case Opt_data_ordered: + args->ar_data = GFS2_DATA_ORDERED; + break; + default: fs_info(sdp, "unknown option: %s\n", o); error = -EINVAL; - break; + goto out_error; } } +out_error: if (error) fs_info(sdp, "invalid mount option(s)\n"); @@ -202,10 +253,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) return error; -need_value: - fs_info(sdp, "need value for option %s\n", o); - return -EINVAL; - cant_remount: fs_info(sdp, "can't remount with option %s\n", o); return -EINVAL; -- cgit v1.2.3 From bf126aee6d54fe1e509846abf3b27aba84c6d7ce Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 20 Apr 2007 09:18:30 +0100 Subject: [GFS2] Patch to fix mmap of stuffed files If a stuffed file is mmaped and a page fault is generated at some offset above the initial page, we need to create a zero page to hang the buffer heads off before we can unstuff the file. This is a fix for bz #236087 Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 90c287932d5..30c15622174 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -197,7 +197,19 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) void *kaddr; int error; - BUG_ON(page->index); + /* + * Due to the order of unstuffing files and ->nopage(), we can be + * asked for a zero page in the case of a stuffed file being extended, + * so we need to supply one here. It doesn't happen often. + */ + if (unlikely(page->index)) { + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, PAGE_CACHE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + SetPageUptodate(page); + return 0; + } error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -208,9 +220,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) ip->i_di.di_size); memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); kunmap_atomic(kaddr, KM_USER0); - + flush_dcache_page(page); brelse(dibh); - SetPageUptodate(page); return 0; -- cgit v1.2.3 From f391a4ead61e4510ff385815ddaf3c0777fbad1b Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Wed, 25 Apr 2007 21:08:02 -0700 Subject: [GFS2] printk warning fixes alpha: fs/gfs2/dir.c: In function 'gfs2_dir_read_leaf': fs/gfs2/dir.c:1322: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'sector_t' fs/gfs2/dir.c: In function 'gfs2_dir_read': fs/gfs2/dir.c:1455: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type '__u64' Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 6c3ed7674a9..a96fa07b3f3 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1319,9 +1319,11 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, if (IS_ERR(dent)) goto out_kfree; if (entries2 != g.offset) { - fs_warn(sdp, "Number of entries corrupt in dir leaf %llu, " - "entries2 (%u) != g.offset (%u)\n", - (u64)bh->b_blocknr, entries2, g.offset); + fs_warn(sdp, "Number of entries corrupt in dir " + "leaf %llu, entries2 (%u) != " + "g.offset (%u)\n", + (unsigned long long)bh->b_blocknr, + entries2, g.offset); error = -EIO; goto out_kfree; @@ -1454,7 +1456,8 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, if (dip->i_di.di_entries != g.offset) { fs_warn(sdp, "Number of entries corrupt in dir %llu, " "ip->i_di.di_entries (%u) != g.offset (%u)\n", - dip->i_num.no_addr, dip->i_di.di_entries, + (unsigned long long)dip->i_num.no_addr, + dip->i_di.di_entries, g.offset); error = -EIO; goto out; -- cgit v1.2.3 From 37fde8ca6c60ea61f5e9d7cb877c25ac60e74167 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 1 May 2007 09:51:39 +0100 Subject: [GFS2] Uncomment sprintf_symbol calling code Now that the patch from -mm has gone upstream, we can uncomment the code in GFS2 which uses sprintf_symbol. Signed-off-by: Steven Whitehouse Cc: Robert Peterson --- fs/gfs2/glock.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7988715b7a0..1815429a297 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1763,22 +1763,10 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, unsigned long address) { -/* when sprint_symbol becomes available in the new kernel, replace this */ -/* function with: char buffer[KSYM_SYMBOL_LEN]; sprint_symbol(buffer, address); print_dbg(gi, fmt, buffer); -*/ - char buffer[256]; - - if (gi) { - memset(buffer, 0, sizeof(buffer)); - sprintf(buffer, "0x%08lx", address); - print_dbg(gi, fmt, buffer); - } - else - print_symbol(fmt, address); } /** -- cgit v1.2.3