diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-02-12 22:43:25 -0800 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-02-12 22:43:25 -0800 |
commit | d9bc125caf592b7d081021f32ce5b717efdf70c8 (patch) | |
tree | 263b7066ba22ddce21db610c0300f6eaac6f2064 /fs/gfs2 | |
parent | 43d78ef2ba5bec26d0315859e8324bfc0be23766 (diff) | |
parent | ec2f9d1331f658433411c58077871e1eef4ee1b4 (diff) |
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Conflicts:
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_spkm3_token.c
net/sunrpc/clnt.c
Merge with mainline and fix conflicts.
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/Kconfig | 47 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 10 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 25 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 21 | ||||
-rw-r--r-- | fs/gfs2/eattr.c | 8 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 316 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 11 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 136 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 18 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 61 | ||||
-rw-r--r-- | fs/gfs2/lm.c | 8 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/lock_dlm.h | 2 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/main.c | 6 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/mount.c | 6 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/plock.c | 2 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/sysfs.c | 13 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 14 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 3 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 134 | ||||
-rw-r--r-- | fs/gfs2/ops_dentry.c | 16 | ||||
-rw-r--r-- | fs/gfs2/ops_export.c | 15 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 52 | ||||
-rw-r--r-- | fs/gfs2/ops_inode.c | 63 | ||||
-rw-r--r-- | fs/gfs2/ops_inode.h | 8 | ||||
-rw-r--r-- | fs/gfs2/ops_super.c | 13 | ||||
-rw-r--r-- | fs/gfs2/ops_super.h | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 24 | ||||
-rw-r--r-- | fs/gfs2/super.c | 16 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 10 |
29 files changed, 391 insertions, 669 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 6a2ffa2db14..de8e64c03f7 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -4,44 +4,43 @@ config GFS2_FS select FS_POSIX_ACL select CRC32 help - A cluster filesystem. + A cluster filesystem. - Allows a cluster of computers to simultaneously use a block device - that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads - and writes to the block device like a local filesystem, but also uses - a lock module to allow the computers coordinate their I/O so - filesystem consistency is maintained. One of the nifty features of - GFS is perfect consistency -- changes made to the filesystem on one - machine show up immediately on all other machines in the cluster. + Allows a cluster of computers to simultaneously use a block device + that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads + and writes to the block device like a local filesystem, but also uses + a lock module to allow the computers coordinate their I/O so + filesystem consistency is maintained. One of the nifty features of + GFS is perfect consistency -- changes made to the filesystem on one + machine show up immediately on all other machines in the cluster. - To use the GFS2 filesystem, you will need to enable one or more of - the below locking modules. Documentation and utilities for GFS2 can - be found here: http://sources.redhat.com/cluster + To use the GFS2 filesystem, you will need to enable one or more of + the below locking modules. Documentation and utilities for GFS2 can + be found here: http://sources.redhat.com/cluster config GFS2_FS_LOCKING_NOLOCK tristate "GFS2 \"nolock\" locking module" depends on GFS2_FS help - Single node locking module for GFS2. + Single node locking module for GFS2. - Use this module if you want to use GFS2 on a single node without - its clustering features. You can still take advantage of the - large file support, and upgrade to running a full cluster later on - if required. + Use this module if you want to use GFS2 on a single node without + its clustering features. You can still take advantage of the + large file support, and upgrade to running a full cluster later on + if required. - If you will only be using GFS2 in cluster mode, you do not need this - module. + If you will only be using GFS2 in cluster mode, you do not need this + module. config GFS2_FS_LOCKING_DLM tristate "GFS2 DLM locking module" - depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) + depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) select IP_SCTP if DLM_SCTP select CONFIGFS_FS select DLM help - Multiple node locking module for GFS2 - - Most users of GFS2 will require this module. It provides the locking - interface between GFS2 and the DLM, which is required to use GFS2 - in a cluster environment. + Multiple node locking module for GFS2 + Most users of GFS2 will require this module. It provides the locking + interface between GFS2 and the DLM, which is required to use GFS2 + in a cluster environment. diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8240c1ff94f..113f6c9110c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_free_data(ip, bstart, blen); } - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, dibh->b_data); @@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) } ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); @@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (!error) { ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_num.no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); } - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0fdcb7713cd..c93ca8f361b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -229,7 +229,7 @@ out: if (ip->i_di.di_size < offset + copied) ip->i_di.di_size = offset + copied; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b) */ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, - void *opaque, gfs2_filldir_t filldir, + void *opaque, filldir_t filldir, const struct gfs2_dirent **darr, u32 entries, int *copied) { const struct gfs2_dirent *dent, *dent_next; - struct gfs2_inum_host inum; u64 off, off_next; unsigned int x, y; int run = 0; @@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, *offset = off; } - gfs2_inum_in(&inum, (char *)&dent->de_inum); - error = filldir(opaque, (const char *)(dent + 1), be16_to_cpu(dent->de_name_len), - off, &inum, + off, be64_to_cpu(dent->de_inum.no_addr), be16_to_cpu(dent->de_type)); if (error) return 1; @@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, } static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir, int *copied, - unsigned *depth, u64 leaf_no) + filldir_t filldir, int *copied, unsigned *depth, + u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1343,7 +1340,7 @@ out: */ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir) + filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1402,7 +1399,7 @@ out: } int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir) + filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); struct dirent_gather g; @@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, break; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; @@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_consist_inode(dip); gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; - dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, gfs2_trans_add_bh(dip->i_gl, bh, 1); } - dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index b21b33668a5..48fe89046bb 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -16,30 +16,13 @@ struct inode; struct gfs2_inode; struct gfs2_inum; -/** - * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() - * @opaque: opaque data used by the function - * @name: the name of the directory entry - * @length: the length of the name - * @offset: the entry's offset in the directory - * @inum: the inode number the entry points to - * @type: the type of inode the entry points to - * - * Returns: 0 on success, 1 if buffer full - */ - -typedef int (*gfs2_filldir_t) (void *opaque, - const char *name, unsigned int length, - u64 offset, - struct gfs2_inum_host *inum, unsigned int type); - int gfs2_dir_search(struct inode *dir, const struct qstr *filename, struct gfs2_inum_host *inum, unsigned int *type); int gfs2_dir_add(struct inode *inode, const struct qstr *filename, const struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); -int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, - gfs2_filldir_t filldir); +int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, + filldir_t filldir); int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, struct gfs2_inum_host *new_inum, unsigned int new_type); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index ebebbdcd705..0c83c7f4dda 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 438146904b5..6618c119025 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -19,6 +19,8 @@ #include <linux/gfs2_ondisk.h> #include <linux/list.h> #include <linux/lm_interface.h> +#include <linux/wait.h> +#include <linux/rwsem.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -33,11 +35,6 @@ #include "super.h" #include "util.h" -struct greedy { - struct gfs2_holder gr_gh; - struct delayed_work gr_work; -}; - struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; @@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); static int dump_glock(struct gfs2_glock *gl); static int dump_inode(struct gfs2_inode *ip); +static void gfs2_glock_xmote_th(struct gfs2_holder *gh); +static void gfs2_glock_drop_th(struct gfs2_glock *gl); +static DECLARE_RWSEM(gfs2_umount_flush_sem); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) @@ -213,30 +213,6 @@ out: } /** - * queue_empty - check to see if a glock's queue is empty - * @gl: the glock - * @head: the head of the queue to check - * - * This function protects the list in the event that a process already - * has a holder on the list and is adding a second holder for itself. - * The glmutex lock is what generally prevents processes from working - * on the same glock at once, but the special case of adding a second - * holder for yourself ("recursive" locking) doesn't involve locking - * glmutex, making the spin lock necessary. - * - * Returns: 1 if the queue is empty - */ - -static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) -{ - int empty; - spin_lock(&gl->gl_spin); - empty = list_empty(head); - spin_unlock(&gl->gl_spin); - return empty; -} - -/** * search_bucket() - Find struct gfs2_glock by lock number * @bucket: the bucket to search * @name: The lock name @@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, gh->gh_flags = flags; gh->gh_error = 0; gh->gh_iflags = 0; - init_completion(&gh->gh_wait); - - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - gfs2_glock_hold(gl); } @@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * { gh->gh_state = state; gh->gh_flags = flags; - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - gh->gh_iflags &= 1 << HIF_ALLOCED; gh->gh_ip = (unsigned long)__builtin_return_address(0); } @@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh) kfree(gh); } +static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) +{ + if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { + gfs2_holder_put(gh); + return; + } + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); +} + +static int holder_wait(void *word) +{ + schedule(); + return 0; +} + +static void wait_on_holder(struct gfs2_holder *gh) +{ + might_sleep(); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); +} + /** * rq_mutex - process a mutex request in the queue * @gh: the glock holder @@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh) list_del_init(&gh->gh_list); /* gh->gh_error never examined. */ set_bit(GLF_LOCK, &gl->gl_flags); - complete(&gh->gh_wait); + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); return 1; } @@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { if (list_empty(&gl->gl_holders)) { @@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh) gfs2_reclaim_glock(sdp); } - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + gfs2_glock_xmote_th(gh); spin_lock(&gl->gl_spin); } return 1; @@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh) set_bit(GLF_LOCK, &gl->gl_flags); } else { struct gfs2_holder *next_gh; - if (gh->gh_flags & GL_LOCAL_EXCL) + if (gh->gh_state == LM_ST_EXCLUSIVE) return 1; next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); - if (next_gh->gh_flags & GL_LOCAL_EXCL) + if (next_gh->gh_state == LM_ST_EXCLUSIVE) return 1; } @@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh) gh->gh_error = 0; set_bit(HIF_HOLDER, &gh->gh_iflags); - complete(&gh->gh_wait); + gfs2_holder_dispose_or_wake(gh); return 0; } @@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh) static int rq_demote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; if (!list_empty(&gl->gl_holders)) return 1; @@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh) list_del_init(&gh->gh_list); gh->gh_error = 0; spin_unlock(&gl->gl_spin); - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); + gfs2_holder_dispose_or_wake(gh); spin_lock(&gl->gl_spin); } else { gl->gl_req_gh = gh; @@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh) if (gh->gh_state == LM_ST_UNLOCKED || gl->gl_state != LM_ST_EXCLUSIVE) - glops->go_drop_th(gl); + gfs2_glock_drop_th(gl); else - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + gfs2_glock_xmote_th(gh); spin_lock(&gl->gl_spin); } @@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh) } /** - * rq_greedy - process a queued request to drop greedy status - * @gh: the glock holder - * - * Returns: 1 if the queue is blocked - */ - -static int rq_greedy(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - gfs2_holder_uninit(gh); - kfree(container_of(gh, struct greedy, gr_gh)); - - spin_lock(&gl->gl_spin); - - return 0; -} - -/** * run_queue - process holder structures on a glock * @gl: the glock * @@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl) if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) blocked = rq_demote(gh); - else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) - blocked = rq_greedy(gh); else gfs2_assert_warn(gl->gl_sbd, 0); @@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) gfs2_holder_init(gl, 0, 0, &gh); set_bit(HIF_MUTEX, &gh.gh_iflags); + if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) + BUG(); spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { @@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) } else { gl->gl_owner = current; gl->gl_ip = (unsigned long)__builtin_return_address(0); - complete(&gh.gh_wait); + clear_bit(HIF_WAIT, &gh.gh_iflags); + smp_mb(); + wake_up_bit(&gh.gh_iflags, HIF_WAIT); } spin_unlock(&gl->gl_spin); - wait_for_completion(&gh.gh_wait); + wait_on_holder(&gh); gfs2_holder_uninit(&gh); } @@ -774,6 +740,7 @@ restart: return; set_bit(HIF_DEMOTE, &new_gh->gh_iflags); set_bit(HIF_DEALLOC, &new_gh->gh_iflags); + set_bit(HIF_WAIT, &new_gh->gh_iflags); goto restart; } @@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) int op_done = 1; gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); state_change(gl, ret & LM_OUT_ST_MASK); @@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_dispose_or_wake(gh); } /** @@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) +void gfs2_glock_xmote_th(struct gfs2_holder *gh) { + struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; + int flags = gh->gh_flags; + unsigned state = gh->gh_state; const struct gfs2_glock_operations *glops = gl->gl_ops; int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_ANY | LM_FLAG_PRIORITY); unsigned int lck_ret; + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); gfs2_assert_warn(sdp, state != gl->gl_state); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl); - gfs2_glock_hold(gl); gl->gl_req_bh = xmote_bh; @@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh = gl->gl_req_gh; - clear_bit(GLF_PREFETCH, &gl->gl_flags); - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); @@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_dispose_or_wake(gh); } /** @@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_drop_th(struct gfs2_glock *gl) +static void gfs2_glock_drop_th(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned int ret; + if (glops->go_drop_th) + glops->go_drop_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl); - gfs2_glock_hold(gl); gl->gl_req_bh = drop_bh; @@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) if (gh->gh_flags & LM_FLAG_PRIORITY) do_cancels(gh); - wait_for_completion(&gh->gh_wait); - + wait_on_holder(gh); if (gh->gh_error) return gh->gh_error; @@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_holder *existing; BUG_ON(!gh->gh_owner); + if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) + BUG(); existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); if (existing) { @@ -1227,8 +1188,6 @@ restart: } } - clear_bit(GLF_PREFETCH, &gl->gl_flags); - return error; } @@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) } /** - * gfs2_glock_prefetch - Try to prefetch a glock - * @gl: the glock - * @state: the state to prefetch in - * @flags: flags passed to go_xmote_th() - * - */ - -static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, - int flags) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - - spin_lock(&gl->gl_spin); - - if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || - !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || - !list_empty(&gl->gl_waiters3) || - relaxed_state_ok(gl->gl_state, state, flags)) { - spin_unlock(&gl->gl_spin); - return; - } - - set_bit(GLF_PREFETCH, &gl->gl_flags); - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - glops->go_xmote_th(gl, state, flags); -} - -static void greedy_work(struct work_struct *work) -{ - struct greedy *gr = container_of(work, struct greedy, gr_work.work); - struct gfs2_holder *gh = &gr->gr_gh; - struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - - if (glops->go_greedy) - glops->go_greedy(gl); - - spin_lock(&gl->gl_spin); - - if (list_empty(&gl->gl_waiters2)) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_holder_uninit(gh); - kfree(gr); - } else { - gfs2_glock_hold(gl); - list_add_tail(&gh->gh_list, &gl->gl_waiters2); - run_queue(gl); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); - } -} - -/** - * gfs2_glock_be_greedy - - * @gl: - * @time: - * - * Returns: 0 if go_greedy will be called, 1 otherwise - */ - -int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) -{ - struct greedy *gr; - struct gfs2_holder *gh; - - if (!time || gl->gl_sbd->sd_args.ar_localcaching || - test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) - return 1; - - gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); - if (!gr) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - return 1; - } - gh = &gr->gr_gh; - - gfs2_holder_init(gl, 0, 0, gh); - set_bit(HIF_GREEDY, &gh->gh_iflags); - INIT_DELAYED_WORK(&gr->gr_work, greedy_work); - - set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - schedule_delayed_work(&gr->gr_work, time); - - return 0; -} - -/** * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it * @gh: the holder structure * @@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) return 1; if (a->ln_number < b->ln_number) return -1; - if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) - return 1; - if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) - return 1; + BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); return 0; } @@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) } /** - * gfs2_glock_prefetch_num - prefetch a glock based on lock number - * @sdp: the filesystem - * @number: the lock number - * @glops: the glock operations for the type of glock - * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition - * - * Returns: errno - */ - -void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, int flags) -{ - struct gfs2_glock *gl; - int error; - - if (atomic_read(&sdp->sd_reclaim_count) < - gfs2_tune_get(sdp, gt_reclaim_limit)) { - error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); - if (!error) { - gfs2_glock_prefetch(gl, state, flags); - gfs2_glock_put(gl); - } - } -} - -/** * gfs2_lvb_hold - attach a LVB from a glock * @gl: The glock in question * @@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, if (!gl) return; - if (gl->gl_ops->go_callback) - gl->gl_ops->go_callback(gl, state); handle_callback(gl, state); spin_lock(&gl->gl_spin); @@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) struct lm_async_cb *async = data; struct gfs2_glock *gl; + down_read(&gfs2_umount_flush_sem); gl = gfs2_glock_find(sdp, &async->lc_name); if (gfs2_assert_warn(sdp, gl)) return; if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) gl->gl_req_bh(gl, async->lc_ret); gfs2_glock_put(gl); + up_read(&gfs2_umount_flush_sem); return; } @@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) static int demote_ok(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; int demote = 1; if (test_bit(GLF_STICKY, &gl->gl_flags)) demote = 0; - else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) - demote = time_after_eq(jiffies, gl->gl_stamp + - gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); else if (glops->go_demote_ok) demote = glops->go_demote_ok(gl); @@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) atomic_inc(&sdp->sd_reclaimed); if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED); gfs2_glmutex_unlock(gl); @@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl) return; if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) goto out_schedule; gfs2_glmutex_unlock(gl); @@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl) } if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED); gfs2_glmutex_unlock(gl); @@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) t = jiffies; } + down_write(&gfs2_umount_flush_sem); invalidate_inodes(sdp->sd_vfs); + up_write(&gfs2_umount_flush_sem); msleep(10); } } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index fb39108fc05..f50e40ceca4 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -20,7 +20,6 @@ #define LM_FLAG_ANY 0x00000008 #define LM_FLAG_PRIORITY 0x00000010 */ -#define GL_LOCAL_EXCL 0x00000020 #define GL_ASYNC 0x00000040 #define GL_EXACT 0x00000080 #define GL_SKIP 0x00000100 @@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh); void gfs2_holder_uninit(struct gfs2_holder *gh); - -void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags); -void gfs2_glock_drop_th(struct gfs2_glock *gl); - int gfs2_glock_nq(struct gfs2_holder *gh); int gfs2_glock_poll(struct gfs2_holder *gh); int gfs2_glock_wait(struct gfs2_holder *gh); void gfs2_glock_dq(struct gfs2_holder *gh); -int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time); - void gfs2_glock_dq_uninit(struct gfs2_holder *gh); int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, @@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); -void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, int flags); - /** * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock * @gl: the glock diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index b068d10bcb6..c4b0391b7aa 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) static void meta_go_sync(struct gfs2_glock *gl) { + if (gl->gl_state != LM_ST_EXCLUSIVE) + return; + if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { gfs2_log_flush(gl->gl_sbd, gl); gfs2_meta_sync(gl); gfs2_ail_empty_gl(gl); } - } /** @@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) } /** + * inode_go_sync - Sync the dirty data and/or metadata for an inode glock + * @gl: the glock protecting the inode + * + */ + +static void inode_go_sync(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip = gl->gl_object; + + if (ip && !S_ISREG(ip->i_inode.i_mode)) + ip = NULL; + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + gfs2_log_flush(gl->gl_sbd, gl); + if (ip) + filemap_fdatawrite(ip->i_inode.i_mapping); + gfs2_meta_sync(gl); + if (ip) { + struct address_space *mapping = ip->i_inode.i_mapping; + int error = filemap_fdatawait(mapping); + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else if (error) + set_bit(AS_EIO, &mapping->flags); + } + clear_bit(GLF_DIRTY, &gl->gl_flags); + gfs2_ail_empty_gl(gl); + } +} + +/** * inode_go_xmote_th - promote/demote a glock * @gl: the glock * @state: the requested state @@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) * */ -static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, - int flags) +static void inode_go_xmote_th(struct gfs2_glock *gl) { if (gl->gl_state != LM_ST_UNLOCKED) gfs2_pte_inval(gl); - gfs2_glock_xmote_th(gl, state, flags); + if (gl->gl_state == LM_ST_EXCLUSIVE) + inode_go_sync(gl); } /** @@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) static void inode_go_drop_th(struct gfs2_glock *gl) { gfs2_pte_inval(gl); - gfs2_glock_drop_th(gl); -} - -/** - * inode_go_sync - Sync the dirty data and/or metadata for an inode glock - * @gl: the glock protecting the inode - * - */ - -static void inode_go_sync(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip = gl->gl_object; - - if (ip && !S_ISREG(ip->i_inode.i_mode)) - ip = NULL; - - if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - gfs2_log_flush(gl->gl_sbd, gl); - if (ip) - filemap_fdatawrite(ip->i_inode.i_mapping); - gfs2_meta_sync(gl); - if (ip) { - struct address_space *mapping = ip->i_inode.i_mapping; - int error = filemap_fdatawait(mapping); - if (error == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else if (error) - set_bit(AS_EIO, &mapping->flags); - } - clear_bit(GLF_DIRTY, &gl->gl_flags); - gfs2_ail_empty_gl(gl); - } + if (gl->gl_state == LM_ST_EXCLUSIVE) + inode_go_sync(gl); } /** @@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh) if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && - (gh->gh_flags & GL_LOCAL_EXCL)) + (gh->gh_state == LM_ST_EXCLUSIVE)) error = gfs2_truncatei_resume(ip); return error; @@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh) } /** - * inode_greedy - - * @gl: the glock - * - */ - -static void inode_greedy(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_inode *ip = gl->gl_object; - unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); - unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); - unsigned int new_time; - - spin_lock(&ip->i_spin); - - if (time_after(ip->i_last_pfault + quantum, jiffies)) { - new_time = ip->i_greedy + quantum; - if (new_time > max) - new_time = max; - } else { - new_time = ip->i_greedy - quantum; - if (!new_time || new_time > max) - new_time = 1; - } - - ip->i_greedy = new_time; - - spin_unlock(&ip->i_spin); - - iput(&ip->i_inode); -} - -/** * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock * @gl: the glock * @@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) * */ -static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, - int flags) +static void trans_go_xmote_th(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; @@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } - - gfs2_glock_xmote_th(gl, state, flags); } /** @@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl) gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } - - gfs2_glock_drop_th(gl); } /** @@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) } const struct gfs2_glock_operations gfs2_meta_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, + .go_xmote_th = meta_go_sync, + .go_drop_th = meta_go_sync, .go_type = LM_TYPE_META, }; @@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_xmote_th = inode_go_xmote_th, .go_xmote_bh = inode_go_xmote_bh, .go_drop_th = inode_go_drop_th, - .go_sync = inode_go_sync, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, .go_unlock = inode_go_unlock, - .go_greedy = inode_greedy, .go_type = LM_TYPE_INODE, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, - .go_sync = meta_go_sync, .go_inval = meta_go_inval, .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, @@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = { }; const struct gfs2_glock_operations gfs2_iopen_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_IOPEN, }; const struct gfs2_glock_operations gfs2_flock_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_FLOCK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_NONDISK, }; const struct gfs2_glock_operations gfs2_quota_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_demote_ok = quota_go_demote_ok, .go_type = LM_TYPE_QUOTA, }; const struct gfs2_glock_operations gfs2_journal_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_JOURNAL, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 734421edae8..12c80fd28db 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -101,17 +101,14 @@ struct gfs2_bufdata { }; struct gfs2_glock_operations { - void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); + void (*go_xmote_th) (struct gfs2_glock *gl); void (*go_xmote_bh) (struct gfs2_glock *gl); void (*go_drop_th) (struct gfs2_glock *gl); void (*go_drop_bh) (struct gfs2_glock *gl); - void (*go_sync) (struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - void (*go_callback) (struct gfs2_glock *gl, unsigned int state); - void (*go_greedy) (struct gfs2_glock *gl); const int go_type; }; @@ -120,7 +117,6 @@ enum { HIF_MUTEX = 0, HIF_PROMOTE = 1, HIF_DEMOTE = 2, - HIF_GREEDY = 3, /* States */ HIF_ALLOCED = 4, @@ -128,6 +124,7 @@ enum { HIF_HOLDER = 6, HIF_FIRST = 7, HIF_ABORTED = 9, + HIF_WAIT = 10, }; struct gfs2_holder { @@ -140,17 +137,14 @@ struct gfs2_holder { int gh_error; unsigned long gh_iflags; - struct completion gh_wait; unsigned long gh_ip; }; enum { GLF_LOCK = 1, GLF_STICKY = 2, - GLF_PREFETCH = 3, GLF_DIRTY = 5, GLF_SKIP_WAITERS2 = 6, - GLF_GREEDY = 7, }; struct gfs2_glock { @@ -167,7 +161,7 @@ struct gfs2_glock { unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ - struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ + struct list_head gl_waiters2; /* HIF_DEMOTE */ struct list_head gl_waiters3; /* HIF_PROMOTE */ const struct gfs2_glock_operations *gl_ops; @@ -236,7 +230,6 @@ struct gfs2_inode { spinlock_t i_spin; struct rw_semaphore i_rw_mutex; - unsigned int i_greedy; unsigned long i_last_pfault; struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; @@ -418,17 +411,12 @@ struct gfs2_tune { unsigned int gt_atime_quantum; /* Min secs between atime updates */ unsigned int gt_new_files_jdata; unsigned int gt_new_files_directio; - unsigned int gt_max_atomic_write; /* Split big writes into this size */ unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_lockdump_size; unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ unsigned int gt_entries_per_readdir; - unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ - unsigned int gt_greedy_default; - unsigned int gt_greedy_quantum; - unsigned int gt_greedy_max; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d122074c45e..0d6831a4056 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -287,10 +287,8 @@ out: * * Returns: errno */ - int gfs2_change_nlink(struct gfs2_inode *ip, int diff) { - struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info; struct buffer_head *dibh; u32 nlink; int error; @@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) else drop_nlink(&ip->i_inode); - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); mark_inode_dirty(&ip->i_inode); - if (ip->i_inode.i_nlink == 0) { - struct gfs2_rgrpd *rgd; - struct gfs2_holder ri_gh, rg_gh; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto out; - error = -EIO; - rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); - if (!rgd) - goto out_norgrp; - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); - if (error) - goto out_norgrp; - + if (ip->i_inode.i_nlink == 0) gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ - gfs2_glock_dq_uninit(&rg_gh); -out_norgrp: - gfs2_glock_dq_uninit(&ri_gh); - } -out: + return error; } struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) { struct qstr qstr; + struct inode *inode; gfs2_str2qstr(&qstr, name); - return gfs2_lookupi(dip, &qstr, 1, NULL); + inode = gfs2_lookupi(dip, &qstr, 1, NULL); + /* gfs2_lookupi has inconsistent callers: vfs + * related routines expect NULL for no entry found, + * gfs2_lookup_simple callers expect ENOENT + * and do not check for NULL. + */ + if (inode == NULL) + return ERR_PTR(-ENOENT); + else + return inode; } @@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) * @is_root: If 1, ignore the caller's permissions * @i_gh: An uninitialized holder for the new inode glock * - * There will always be a vnode (Linux VFS inode) for the d_gh inode unless - * @is_root is true. + * This can be called via the VFS filldir function when NFS is doing + * a readdirplus and the inode which its intending to stat isn't + * already in cache. In this case we must not take the directory glock + * again, since the readdir call will have already taken that lock. * * Returns: errno */ @@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct gfs2_holder d_gh; struct gfs2_inum_host inum; unsigned int type; - int error = 0; + int error; struct inode *inode = NULL; + int unlock = 0; if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, return dir; } - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - return ERR_PTR(error); + if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + return ERR_PTR(error); + unlock = 1; + } if (!is_root) { error = permission(dir, MAY_EXEC, NULL); @@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, inode = gfs2_inode_lookup(sb, &inum, type); out: - gfs2_glock_dq_uninit(&d_gh); + if (unlock) + gfs2_glock_dq_uninit(&d_gh); if (error == -ENOENT) return NULL; - return inode; + return inode ? inode : ERR_PTR(error); } static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index effe4a337c1..e30673dd37e 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c @@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) vprintk(fmt, args); va_end(args); - fs_err(sdp, "about to withdraw from the cluster\n"); + fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); - - fs_err(sdp, "waiting for outstanding I/O\n"); - - /* FIXME: suspend dm device so oustanding bio's complete - and all further io requests fail */ - fs_err(sdp, "telling LM to withdraw\n"); gfs2_withdraw_lockproto(&sdp->sd_lockstruct); fs_err(sdp, "withdrawn\n"); diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 33af707a4d3..a87c7bf3c56 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -36,7 +36,7 @@ #define GDLM_STRNAME_BYTES 24 #define GDLM_LVB_SIZE 32 -#define GDLM_DROP_COUNT 50000 +#define GDLM_DROP_COUNT 200000 #define GDLM_DROP_PERIOD 60 #define GDLM_NAME_LEN 128 diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index 2194b1d5b5e..a0e7eda643e 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c @@ -11,9 +11,6 @@ #include "lock_dlm.h" -extern int gdlm_drop_count; -extern int gdlm_drop_period; - extern struct lm_lockops gdlm_ops; static int __init init_lock_dlm(void) @@ -40,9 +37,6 @@ static int __init init_lock_dlm(void) return error; } - gdlm_drop_count = GDLM_DROP_COUNT; - gdlm_drop_period = GDLM_DROP_PERIOD; - printk(KERN_INFO "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index cdd1694e889..1d8faa3da8a 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -9,8 +9,6 @@ #include "lock_dlm.h" -int gdlm_drop_count; -int gdlm_drop_period; const struct lm_lockops gdlm_ops; @@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, if (!ls) return NULL; - ls->drop_locks_count = gdlm_drop_count; - ls->drop_locks_period = gdlm_drop_period; + ls->drop_locks_count = GDLM_DROP_COUNT; + ls->drop_locks_period = GDLM_DROP_PERIOD; ls->fscb = cb; ls->sdp = sdp; ls->fsflags = flags; diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index 3799f19b282..1dd4215b83d 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c @@ -264,7 +264,7 @@ static unsigned int dev_poll(struct file *file, poll_table *wait) return 0; } -static struct file_operations dev_fops = { +static const struct file_operations dev_fops = { .read = dev_read, .write = dev_write, .poll = dev_poll, diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 29ae06f9494..4746b884662 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c @@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) return sprintf(buf, "%d\n", ls->recover_jid_status); } +static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) +{ + return sprintf(buf, "%d\n", ls->drop_locks_count); +} + +static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) +{ + ls->drop_locks_count = simple_strtol(buf, NULL, 0); + return len; +} + struct gdlm_attr { struct attribute attr; ssize_t (*show)(struct gdlm_ls *, char *); @@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0644, recover_show, recover_store); GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); static struct attribute *gdlm_attrs[] = { &gdlm_attr_proto_name.attr, @@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = { &gdlm_attr_recover.attr, &gdlm_attr_recover_done.attr, &gdlm_attr_recover_status.attr, + &gdlm_attr_drop_count.attr, NULL, }; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d7f94d8c7b..16bb4b4561a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_trans *tr; - if (!list_empty(&bd->bd_list_tr)) + gfs2_log_lock(sdp); + if (!list_empty(&bd->bd_list_tr)) { + gfs2_log_unlock(sdp); return; - + } tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_log_unlock(sdp); if (!list_empty(&le->le_list)) return; @@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); - gfs2_log_lock(sdp); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); @@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct list_head *head = &tr->tr_list_buf; struct gfs2_bufdata *bd; + gfs2_log_lock(sdp); while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); list_del_init(&bd->bd_list_tr); tr->tr_num_buf--; } + gfs2_log_unlock(sdp); gfs2_assert_warn(sdp, !tr->tr_num_buf); } @@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); + gfs2_log_lock(sdp); tr->tr_touched = 1; if (list_empty(&bd->bd_list_tr) && (ip->i_di.di_flags & GFS2_DIF_JDATA)) { tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_log_unlock(sdp); gfs2_pin(sdp, bd->bd_bh); tr->tr_num_buf_new++; + } else { + gfs2_log_unlock(sdp); } gfs2_trans_add_gl(bd->bd_gl); gfs2_log_lock(sdp); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0e34d991897..e62d4f620c5 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -282,8 +282,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, return; } - bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), - memset(bd, 0, sizeof(struct gfs2_bufdata)); + bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), bd->bd_bh = bh; bd->bd_gl = gl; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10..56e33590b65 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -16,6 +16,7 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/fs.h> +#include <linux/writeback.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> @@ -157,6 +158,32 @@ out_ignore: } /** + * gfs2_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: Write-back control + * + * For journaled files and/or ordered writes this just falls back to the + * kernel's default writepages path for now. We will probably want to change + * that eventually (i.e. when we look at allocate on flush). + * + * For the data=writeback case though we can already ignore buffer heads + * and write whole extents at once. This is a big reduction in the + * number of I/O requests we send and the bmap calls we make in this case. + */ +static int gfs2_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) + return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); + + return generic_writepages(mapping, wbc); +} + +/** * stuffed_readpage - Fill in a Linux page with stuffed file data * @ip: the inode * @page: the page @@ -256,7 +283,7 @@ out_unlock: * the page lock and the glock) and return having done no I/O. Its * obviously not something we'd want to do on too regular a basis. * Any I/O we ignore at this time will be done via readpage later. - * 2. We have to handle stuffed files here too. + * 2. We don't handle stuffed files here we let readpage do the honours. * 3. mpage_readpages() does most of the heavy lifting in the common case. * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as @@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder gh; - unsigned page_idx; - int ret; + int ret = 0; int do_unlock = 0; if (likely(file != &gfs2_internal_file_sentinel)) { @@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, goto out_unlock; } skip_lock: - if (gfs2_is_stuffed(ip)) { - struct pagevec lru_pvec; - pagevec_init(&lru_pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - prefetchw(&page->flags); - list_del(&page->lru); - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - ret = stuffed_readpage(ip, page); - unlock_page(page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); - } - } - pagevec_lru_add(&lru_pvec); - ret = 0; - } else { - /* What we really want to do .... */ + if (!gfs2_is_stuffed(ip)) ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); - } if (do_unlock) { gfs2_glock_dq_m(1, &gh); @@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); error = gfs2_glock_nq_atime(&ip->i_gh); if (unlikely(error)) { - if (error == GLR_TRYFAILED) + if (error == GLR_TRYFAILED) { + unlock_page(page); error = AOP_TRUNCATED_PAGE; + } goto out_uninit; } @@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) return; } +/** + * gfs2_ok_for_dio - check that dio is valid on this file + * @ip: The inode + * @rw: READ or WRITE + * @offset: The offset at which we are reading or writing + * + * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) + * 1 (to accept the i/o request) + */ +static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) +{ + /* + * Should we return an error here? I can't see that O_DIRECT for + * a journaled file makes any sense. For now we'll silently fall + * back to buffered I/O, likewise we do the same for stuffed + * files since they are (a) small and (b) unaligned. + */ + if (gfs2_is_jdata(ip)) + return 0; + + if (gfs2_is_stuffed(ip)) + return 0; + + if (offset > i_size_read(&ip->i_inode)) + return 0; + return 1; +} + + + static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, struct gfs2_holder gh; int rv; - if (rw == READ) - mutex_lock(&inode->i_mutex); /* - * Shared lock, even if its a write, since we do no allocation - * on this path. All we need change is atime. + * Deferred lock, even if its a write, since we do no allocation + * on this path. All we need change is atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately have the option of only flushing a range like + * the VFS does. */ - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); rv = gfs2_glock_nq_atime(&gh); if (rv) - goto out; - - if (offset > i_size_read(inode)) - goto out; - - /* - * Should we return an error here? I can't see that O_DIRECT for - * a journaled file makes any sense. For now we'll silently fall - * back to buffered I/O, likewise we do the same for stuffed - * files since they are (a) small and (b) unaligned. - */ - if (gfs2_is_jdata(ip)) - goto out; - - if (gfs2_is_stuffed(ip)) - goto out; - - rv = blockdev_direct_IO_own_locking(rw, iocb, inode, - inode->i_sb->s_bdev, - iov, offset, nr_segs, - gfs2_get_block_direct, NULL); + return rv; + rv = gfs2_ok_for_dio(ip, rw, offset); + if (rv != 1) + goto out; /* dio not valid, fall back to buffered i/o */ + + rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, + gfs2_get_block_direct, NULL); out: gfs2_glock_dq_m(1, &gh); gfs2_holder_uninit(&gh); - if (rw == READ) - mutex_unlock(&inode->i_mutex); - return rv; } @@ -763,6 +786,7 @@ out: const struct address_space_operations gfs2_file_aops = { .writepage = gfs2_writepage, + .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index d355899585d..9187eb174b4 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) struct gfs2_inum_host inum; unsigned int type; int error; + int had_lock=0; if (inode && is_bad_inode(inode)) goto invalid; @@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) if (sdp->sd_args.ar_localcaching) goto valid; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - goto fail; + had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); + if (!had_lock) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + } error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); switch (error) { @@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) } valid_gunlock: - gfs2_glock_dq_uninit(&d_gh); + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); valid: dput(parent); return 1; invalid_gunlock: - gfs2_glock_dq_uninit(&d_gh); + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); invalid: if (inode && S_ISDIR(inode->i_mode)) { if (have_submounts(dentry)) diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b4e7b877531..4855e8cca62 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -22,6 +22,7 @@ #include "glock.h" #include "glops.h" #include "inode.h" +#include "ops_dentry.h" #include "ops_export.h" #include "rgrp.h" #include "util.h" @@ -112,13 +113,12 @@ struct get_name_filldir { char *name; }; -static int get_name_filldir(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum_host *inum, - unsigned int type) +static int get_name_filldir(void *opaque, const char *name, int length, + loff_t offset, u64 inum, unsigned int type) { - struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; + struct get_name_filldir *gnfd = opaque; - if (!gfs2_inum_equal(inum, &gnfd->inum)) + if (inum != gnfd->inum.no_addr) return 0; memcpy(gnfd->name, name, length); @@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) return ERR_PTR(-ENOMEM); } + dentry->d_op = &gfs2_dops; return dentry; } @@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) } error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, - &i_gh); + LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return ERR_PTR(error); @@ -269,6 +269,7 @@ out_inode: return ERR_PTR(-ENOMEM); } + dentry->d_op = &gfs2_dops; return dentry; fail_rgd: diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index faa07e4b97d..c996aa739a0 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -43,15 +43,6 @@ #include "util.h" #include "eaops.h" -/* For regular, non-NFS */ -struct filldir_reg { - struct gfs2_sbd *fdr_sbd; - int fdr_prefetch; - - filldir_t fdr_filldir; - void *fdr_opaque; -}; - /* * Most fields left uninitialised to catch anybody who tries to * use them. f_flags set to prevent file_accessed() from touching @@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) } /** - * filldir_func - Report a directory entry to the caller of gfs2_dir_read() - * @opaque: opaque data used by the function - * @name: the name of the directory entry - * @length: the length of the name - * @offset: the entry's offset in the directory - * @inum: the inode number the entry points to - * @type: the type of inode the entry points to - * - * Returns: 0 on success, 1 if buffer full - */ - -static int filldir_func(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum_host *inum, - unsigned int type) -{ - struct filldir_reg *fdr = (struct filldir_reg *)opaque; - struct gfs2_sbd *sdp = fdr->fdr_sbd; - int error; - - error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, - inum->no_addr, type); - if (error) - return 1; - - if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { - gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); - gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops, - LM_ST_SHARED, LM_FLAG_TRY); - } - - return 0; -} - -/** * gfs2_readdir - Read directory entries from a directory * @file: The directory to read from * @dirent: Buffer for dirents @@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) { struct inode *dir = file->f_mapping->host; struct gfs2_inode *dip = GFS2_I(dir); - struct filldir_reg fdr; struct gfs2_holder d_gh; u64 offset = file->f_pos; int error; - fdr.fdr_sbd = GFS2_SB(dir); - fdr.fdr_prefetch = 1; - fdr.fdr_filldir = filldir; - fdr.fdr_opaque = dirent; - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); error = gfs2_glock_nq_atime(&d_gh); if (error) { @@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) return error; } - error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); + error = gfs2_dir_read(dir, &offset, dirent, filldir); gfs2_glock_dq_uninit(&d_gh); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 636dda4c7d3..60f47bf2e8e 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[2]; + struct gfs2_holder ghs[3]; + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh; int error; + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + + + error = gfs2_glock_nq_m(3, ghs); if (error) goto out; @@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) out_end_trans: gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(2, ghs); + gfs2_glock_dq_m(3, ghs); out: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); + gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[2]; + struct gfs2_holder ghs[3]; + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh; int error; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + + error = gfs2_glock_nq_m(3, ghs); if (error) goto out; @@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(2, ghs); + gfs2_glock_dq_m(3, ghs); out: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); + gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[4], r_gh; + struct gfs2_holder ghs[5], r_gh; + struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; int alloc_required; @@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (nip) { gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); num_gh++; + /* grab the resource lock for unlink flag twiddling + * this is the case of the target file already existing + * so we unlink before doing the rename + */ + nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); + if (nrgd) + gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } error = gfs2_glock_nq_m(num_gh, ghs); @@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + al->al_rgd->rd_ri.ri_length + 4 * RES_DINODE + 4 * RES_LEAF + - RES_STATFS + RES_QUOTA, 0); + RES_STATFS + RES_QUOTA + 4, 0); if (error) goto out_ipreserv; } else { error = gfs2_trans_begin(sdp, 4 * RES_DINODE + - 5 * RES_LEAF, 0); + 5 * RES_LEAF + 4, 0); if (error) goto out_gunlock; } @@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, } generic_fillattr(inode, stat); - if (unlock); + if (unlock) gfs2_glock_dq_uninit(&gh); return 0; @@ -1084,7 +1115,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); } -struct inode_operations gfs2_file_iops = { +const struct inode_operations gfs2_file_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -1094,7 +1125,7 @@ struct inode_operations gfs2_file_iops = { .removexattr = gfs2_removexattr, }; -struct inode_operations gfs2_dev_iops = { +const struct inode_operations gfs2_dev_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -1104,7 +1135,7 @@ struct inode_operations gfs2_dev_iops = { .removexattr = gfs2_removexattr, }; -struct inode_operations gfs2_dir_iops = { +const struct inode_operations gfs2_dir_iops = { .create = gfs2_create, .lookup = gfs2_lookup, .link = gfs2_link, @@ -1123,7 +1154,7 @@ struct inode_operations gfs2_dir_iops = { .removexattr = gfs2_removexattr, }; -struct inode_operations gfs2_symlink_iops = { +const struct inode_operations gfs2_symlink_iops = { .readlink = gfs2_readlink, .follow_link = gfs2_follow_link, .permission = gfs2_permission, diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index b15acb4fd34..34f0caac1a0 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h @@ -12,9 +12,9 @@ #include <linux/fs.h> -extern struct inode_operations gfs2_file_iops; -extern struct inode_operations gfs2_dir_iops; -extern struct inode_operations gfs2_symlink_iops; -extern struct inode_operations gfs2_dev_iops; +extern const struct inode_operations gfs2_file_iops; +extern const struct inode_operations gfs2_dir_iops; +extern const struct inode_operations gfs2_symlink_iops; +extern const struct inode_operations gfs2_dev_iops; #endif /* __OPS_INODE_DOT_H__ */ diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 7685b46f934..b89999d3a76 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; int error; + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return; + for (;;) { error = gfs2_freeze_fs(sdp); if (!error) @@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode) } error = gfs2_dinode_dealloc(ip); + /* + * Must do this before unlock to avoid trying to write back + * potentially dirty data now that inode no longer exists + * on disk. + */ + truncate_inode_pages(&inode->i_data, 0); out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); @@ -443,14 +452,12 @@ out: static struct inode *gfs2_alloc_inode(struct super_block *sb) { - struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip; ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); if (ip) { ip->i_flags = 0; ip->i_gl = NULL; - ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); ip->i_last_pfault = jiffies; } return &ip->i_inode; @@ -461,7 +468,7 @@ static void gfs2_destroy_inode(struct inode *inode) kmem_cache_free(gfs2_inode_cachep, inode); } -struct super_operations gfs2_super_ops = { +const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .destroy_inode = gfs2_destroy_inode, .write_inode = gfs2_write_inode, diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h index 9de73f042f7..442a274c627 100644 --- a/fs/gfs2/ops_super.h +++ b/fs/gfs2/ops_super.h @@ -12,6 +12,6 @@ #include <linux/fs.h> -extern struct super_operations gfs2_super_ops; +extern const struct super_operations gfs2_super_ops; #endif /* __OPS_SUPER_DOT_H__ */ diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 45a5f11fc39..14b380fb060 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -28,34 +28,13 @@ #include "trans.h" #include "util.h" -static void pfault_be_greedy(struct gfs2_inode *ip) -{ - unsigned int time; - - spin_lock(&ip->i_spin); - time = ip->i_greedy; - ip->i_last_pfault = jiffies; - spin_unlock(&ip->i_spin); - - igrab(&ip->i_inode); - if (gfs2_glock_be_greedy(ip->i_gl, time)) - iput(&ip->i_inode); -} - static struct page *gfs2_private_nopage(struct vm_area_struct *area, unsigned long address, int *type) { struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); - struct page *result; set_bit(GIF_PAGED, &ip->i_flags); - - result = filemap_nopage(area, address, type); - - if (result && result != NOPAGE_OOM) - pfault_be_greedy(ip); - - return result; + return filemap_nopage(area, address, type); } static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) @@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, set_page_dirty(result); } - pfault_be_greedy(ip); out: gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 43a24f2e590..70f424fcf1c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_atime_quantum = 3600; gt->gt_new_files_jdata = 0; gt->gt_new_files_directio = 0; - gt->gt_max_atomic_write = 4 << 20; gt->gt_max_readahead = 1 << 18; gt->gt_lockdump_size = 131072; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; gt->gt_reclaim_limit = 5000; gt->gt_entries_per_readdir = 32; - gt->gt_prefetch_secs = 10; - gt->gt_greedy_default = HZ / 10; - gt->gt_greedy_quantum = HZ / 40; - gt->gt_greedy_max = HZ / 4; gt->gt_statfs_quantum = 30; gt->gt_statfs_slow = 0; } @@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, - GL_LOCAL_EXCL, ji_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) struct gfs2_log_header_host head; int error; - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - GL_LOCAL_EXCL, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); if (error) return error; @@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp); gfs2_statfs_sync(sdp); - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - GL_LOCAL_EXCL | GL_NOCACHE, - &t_gh); + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 983eaf1e06b..d01f9f0fda2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0); TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(reclaim_limit, 0); -TUNE_ATTR(prefetch_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(new_files_directio, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(quota_cache_secs, 1); -TUNE_ATTR(max_atomic_write, 1); TUNE_ATTR(stall_secs, 1); -TUNE_ATTR(greedy_default, 1); -TUNE_ATTR(greedy_quantum, 1); -TUNE_ATTR(greedy_max, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_DAEMON(scand_secs, scand_process); TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); @@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_reclaim_limit.attr, - &tune_attr_prefetch_secs.attr, &tune_attr_statfs_slow.attr, &tune_attr_quota_simul_sync.attr, &tune_attr_quota_cache_secs.attr, - &tune_attr_max_atomic_write.attr, &tune_attr_stall_secs.attr, - &tune_attr_greedy_default.attr, - &tune_attr_greedy_quantum.attr, - &tune_attr_greedy_max.attr, &tune_attr_statfs_quantum.attr, &tune_attr_scand_secs.attr, &tune_attr_recoverd_secs.attr, |