From f5c1e2ea71603bc2962041fef9dd902cb8626a1d Mon Sep 17 00:00:00 2001 From: Alan Tyson Date: Sat, 10 Mar 2007 06:05:14 +0000 Subject: [CIFS] reset mode when client notices that ATTR_READONLY is no longer set Signed-off-by: Alan Tyso Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 +++ fs/cifs/inode.c | 6 ++++++ fs/cifs/readdir.c | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 6247628bdae..1cbe5615993 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,6 +4,9 @@ Fix mtime bouncing around from local idea of last write times to remote time. Fix hang (in i_size_read) when simultaneous size update of same remote file on smp system corrupts sequence number. Do not reread unnecessarily partial page (which we are about to overwrite anyway) when writing out file opened rw. +When DOS attribute of file on non-Unix server's file changes on the server side +from read-only back to read-write, reflect this change in default file mode +(we had been leaving a file's mode read-only until the inode were reloaded) Version 1.47 ------------ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 86b9dbbd844..e75a844accd 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -494,6 +494,12 @@ int cifs_get_inode_info(struct inode **pinode, mode e.g. 555 */ if (cifsInfo->cifsAttrs & ATTR_READONLY) inode->i_mode &= ~(S_IWUGO); + else if ((inode->i_mode & S_IWUGO) == 0) + /* the ATTR_READONLY flag may have been */ + /* changed on server -- set any w bits */ + /* allowed by mnt_file_mode */ + inode->i_mode |= (S_IWUGO & + cifs_sb->mnt_file_mode); /* BB add code here - validate if device or weird share or device type? */ } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 44cfb528797..2a374d5215a 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -219,6 +219,10 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_mode |= S_IFREG; if (attr & ATTR_READONLY) tmp_inode->i_mode &= ~(S_IWUGO); + else if ((tmp_inode->i_mode & S_IWUGO) == 0) + /* the ATTR_READONLY flag may have been changed on */ + /* server -- set any w bits allowed by mnt_file_mode */ + tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); } /* could add code here - to validate if device or weird share type? */ /* can not fill in nlink here as in qpathinfo version and Unx search */ -- cgit v1.2.3 From c24f72cc7ca829bbad0532ddf315ace3ae1c359e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Sat, 3 Feb 2007 03:14:30 -0800 Subject: ocfs2: Proper cleanup in case of error in ocfs2_register_hb_callbacks() If ocfs2_register_hb_callbacks() succeeds on its first callback but fails its second, it doesn't release the first on the way out. Fix that. While we're at it, o2hb_unregister_callback() never returns anything but 0, so let's make it void. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/heartbeat.c | 6 ++---- fs/ocfs2/cluster/heartbeat.h | 2 +- fs/ocfs2/cluster/tcp.c | 13 ++----------- fs/ocfs2/heartbeat.c | 15 +++++---------- 4 files changed, 10 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5a9779bb923..0f2cfecd42c 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1682,7 +1682,7 @@ out: } EXPORT_SYMBOL_GPL(o2hb_register_callback); -int o2hb_unregister_callback(struct o2hb_callback_func *hc) +void o2hb_unregister_callback(struct o2hb_callback_func *hc) { BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); @@ -1690,15 +1690,13 @@ int o2hb_unregister_callback(struct o2hb_callback_func *hc) __builtin_return_address(0), hc); if (list_empty(&hc->hc_item)) - return 0; + return; down_write(&o2hb_callback_sem); list_del_init(&hc->hc_item); up_write(&o2hb_callback_sem); - - return 0; } EXPORT_SYMBOL_GPL(o2hb_unregister_callback); diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index cac6223206a..cc6d40b3977 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -70,7 +70,7 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, void *data, int priority); int o2hb_register_callback(struct o2hb_callback_func *hc); -int o2hb_unregister_callback(struct o2hb_callback_func *hc); +void o2hb_unregister_callback(struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_init(void); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1718215fc01..69caf3e12fe 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1638,17 +1638,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, void o2net_unregister_hb_callbacks(void) { - int ret; - - ret = o2hb_unregister_callback(&o2net_hb_up); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat up " - "callback!\n", ret); - - ret = o2hb_unregister_callback(&o2net_hb_down); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat down " - "callback!\n", ret); + o2hb_unregister_callback(&o2net_hb_up); + o2hb_unregister_callback(&o2net_hb_down); } int o2net_register_hb_callbacks(void) diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 8fc52d6d0ce..b25ef63781b 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -164,8 +164,10 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) } status = o2hb_register_callback(&osb->osb_hb_up); - if (status < 0) + if (status < 0) { mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + } bail: return status; @@ -173,18 +175,11 @@ bail: void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) { - int status; - if (ocfs2_mount_local(osb)) return; - status = o2hb_unregister_callback(&osb->osb_hb_down); - if (status < 0) - mlog_errno(status); - - status = o2hb_unregister_callback(&osb->osb_hb_up); - if (status < 0) - mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + o2hb_unregister_callback(&osb->osb_hb_up); } void ocfs2_stop_heartbeat(struct ocfs2_super *osb) -- cgit v1.2.3 From e6c352dbc0f4dc7e3f82feafb9e6207c5814a189 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Sat, 3 Feb 2007 03:04:20 -0800 Subject: ocfs2: Concurrent access of o2hb_region->hr_task was not locked This means that a build-up and a teardown could race which would result in a double-kthread_stop(). Protect the setting and clearing of hr_task with o2hb_live_lock, as it's not a common thing and not performance critical. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/heartbeat.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 0f2cfecd42c..eba282da500 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1234,6 +1234,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, const char *page, size_t count) { + struct task_struct *hb_task; long fd; int sectsize; char *p = (char *)page; @@ -1319,20 +1320,28 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, */ atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); - reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s", - reg->hr_item.ci_name); - if (IS_ERR(reg->hr_task)) { - ret = PTR_ERR(reg->hr_task); + hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", + reg->hr_item.ci_name); + if (IS_ERR(hb_task)) { + ret = PTR_ERR(hb_task); mlog_errno(ret); - reg->hr_task = NULL; goto out; } + spin_lock(&o2hb_live_lock); + reg->hr_task = hb_task; + spin_unlock(&o2hb_live_lock); + ret = wait_event_interruptible(o2hb_steady_queue, atomic_read(®->hr_steady_iterations) == 0); if (ret) { - kthread_stop(reg->hr_task); + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); goto out; } @@ -1354,10 +1363,17 @@ out: static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, char *page) { - if (!reg->hr_task) + pid_t pid = 0; + + spin_lock(&o2hb_live_lock); + if (reg->hr_task) + pid = reg->hr_task->pid; + spin_unlock(&o2hb_live_lock); + + if (!pid) return 0; - return sprintf(page, "%u\n", reg->hr_task->pid); + return sprintf(page, "%u\n", pid); } struct o2hb_region_attribute { @@ -1495,13 +1511,17 @@ out: static void o2hb_heartbeat_group_drop_item(struct config_group *group, struct config_item *item) { + struct task_struct *hb_task; struct o2hb_region *reg = to_o2hb_region(item); /* stop the thread when the user removes the region dir */ - if (reg->hr_task) { - kthread_stop(reg->hr_task); - reg->hr_task = NULL; - } + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); config_item_put(item); } -- cgit v1.2.3 From 03f981cf2ec95dd8bc43d2ecccaec4e83c8375e2 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 4 Jan 2007 14:54:41 -0800 Subject: ocfs2: add some missing address space callbacks Under load, OCFS2 would crash in invalidate_inode_pages2_range() because invalidate_complete_page2() was unable to invalidate a page. It would appear that JBD is holding on to the page. ext3 has a specific ->releasepage() handler to cover this case. Steal ext3's ->releasepage(), ->invalidatepage(), and ->migratepage(), as they appear completely appropriate for OCFS2. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/aops.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 93628b02ef5..875c1144381 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -614,6 +614,27 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, ocfs2_rw_unlock(inode, 0); } +/* + * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen + * from ext3. PageChecked() bits have been removed as OCFS2 does not + * do journalled data. + */ +static void ocfs2_invalidatepage(struct page *page, unsigned long offset) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + journal_invalidatepage(journal, page, offset); +} + +static int ocfs2_releasepage(struct page *page, gfp_t wait) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + if (!page_has_buffers(page)) + return 0; + return journal_try_to_free_buffers(journal, page, wait); +} + static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, @@ -661,5 +682,8 @@ const struct address_space_operations ocfs2_aops = { .commit_write = ocfs2_commit_write, .bmap = ocfs2_bmap, .sync_page = block_sync_page, - .direct_IO = ocfs2_direct_IO + .direct_IO = ocfs2_direct_IO, + .invalidatepage = ocfs2_invalidatepage, + .releasepage = ocfs2_releasepage, + .migratepage = buffer_migrate_page, }; -- cgit v1.2.3 From afdf04ea098139e86147f63aad9c383cad3b6f37 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 5 Mar 2007 15:49:49 -0800 Subject: configfs: add missing mutex_unlock() d_alloc() failure in configfs_register_subsystem() would fail to unlock the mutex taken above. Reorganize the exit path to ensure the unlock happens. Reported-by: Akinobu Mita Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/configfs/dir.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 34750d5e4ff..5e6e37e58f3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1141,25 +1141,22 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); - if (!dentry) - goto out_release; - - d_add(dentry, NULL); + if (dentry) { + d_add(dentry, NULL); - err = configfs_attach_group(sd->s_element, &group->cg_item, - dentry); - if (!err) - dentry = NULL; - else - d_delete(dentry); + err = configfs_attach_group(sd->s_element, &group->cg_item, + dentry); + if (err) { + d_delete(dentry); + dput(dentry); + } + } mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); - if (dentry) { - dput(dentry); -out_release: - unlink_group(group); - configfs_release_fs(); + if (err) { + unlink_group(group); + configfs_release_fs(); } return err; -- cgit v1.2.3 From 3fca0894a4b5e52c278421b04435b88e32b423ad Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 Mar 2007 13:24:34 -0700 Subject: ocfs2_dlm: Missing get/put lockres in dlm_run_purge_lockres In some circumstances, this was causing us to reference freed memory. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmthread.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 8ffa0916eb8..6421a8fae1d 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -265,8 +265,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ mlog(0, "calling dlm_purge_lockres!\n"); + dlm_lockres_get(lockres); if (dlm_purge_lockres(dlm, lockres)) BUG(); + dlm_lockres_put(lockres); mlog(0, "DONE calling dlm_purge_lockres!\n"); /* Avoid adding any scheduling latencies */ -- cgit v1.2.3 From b36c3f84988eebf38acaccc756e05f6b70e333ab Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 Mar 2007 13:25:44 -0700 Subject: ocfs2_dlm: Add missing locks in dlm_empty_lockres __dlm_lockres_unused() expects the caller to take the lockres spinlock. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 77e4e6169a0..9229e04362f 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2730,14 +2730,17 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) int ret; int lock_dropped = 0; + spin_lock(&res->spinlock); if (res->owner != dlm->node_num) { if (!__dlm_lockres_unused(res)) { mlog(ML_ERROR, "%s:%.*s: this node is not master, " "trying to free this but locks remain\n", dlm->name, res->lockname.len, res->lockname.name); } + spin_unlock(&res->spinlock); goto leave; } + spin_unlock(&res->spinlock); /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ spin_unlock(&dlm->spinlock); -- cgit v1.2.3 From 04ff97086b1a3237bbd1fe6390fa80fe75207e23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Mar 2007 16:17:58 +0000 Subject: [PATCH] sanitize security_getprocattr() API have it return the buffer it had allocated Acked-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/proc/base.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 01f7769da8e..989af5e55d1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1558,29 +1558,20 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_path.dentry->d_inode; - unsigned long page; + char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); - length = -ESRCH; if (!task) - goto out_no_task; - - if (count > PAGE_SIZE) - count = PAGE_SIZE; - length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) - goto out; + return -ESRCH; length = security_getprocattr(task, (char*)file->f_path.dentry->d_name.name, - (void*)page, count); - if (length >= 0) - length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); - free_page(page); -out: + &p); put_task_struct(task); -out_no_task: + if (length > 0) + length = simple_read_from_buffer(buf, count, ppos, p, length); + kfree(p); return length; } -- cgit v1.2.3 From a033f35a2206e28af8109c62b403d940ba89d2b9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 14 Mar 2007 09:16:24 +0000 Subject: [PATCH] include of asm/pgtable.h in nfsfh is bogus not needed and actually breaks build on frv, while we are at it Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/nfsd/nfsfh.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c2660cbfcd9..8d995bcef80 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 82c05a13c9e0d782941f975eabf6c5a7928cc4d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 14 Mar 2007 09:19:40 +0000 Subject: [PATCH] cifs endianness annotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/cifs/cifspdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 0efdf35aab2..3af76249dc8 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -220,7 +220,7 @@ */ #define CIFS_NO_HANDLE 0xFFFF -#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL +#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) #define NO_CHANGE_32 0xFFFFFFFFUL /* IPC$ in ASCII */ -- cgit v1.2.3 From d9a9cdfb078d755e648d53ec25b7370f84ee5729 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Mar 2007 15:50:34 -0400 Subject: [PATCH] sysfs and driver core: add callback helper, used by SCSI and S390 This patch (as868) adds a helper routine for device drivers that need to set up a callback to perform some action in a different process's context. This is intended for use by attribute methods that want to unregister themselves or their parent device. Attribute method calls are mutually exclusive with unregistration, so such actions cannot be taken directly. Two attribute methods are converted to use the new helper routine: one for SCSI device deletion and one for System/390 ccwgroup devices. Signed-off-by: Alan Stern Cc: Hugh Dickins Cc: Cornelia Huck Cc: Oliver Neukum Signed-off-by: Linus Torvalds --- fs/sysfs/file.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 8d4d839a9d8..1bafdf6e171 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -629,6 +629,60 @@ void sysfs_remove_file_from_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); +struct sysfs_schedule_callback_struct { + struct kobject *kobj; + void (*func)(void *); + void *data; + struct work_struct work; +}; + +static void sysfs_schedule_callback_work(struct work_struct *work) +{ + struct sysfs_schedule_callback_struct *ss = container_of(work, + struct sysfs_schedule_callback_struct, work); + + (ss->func)(ss->data); + kobject_put(ss->kobj); + kfree(ss); +} + +/** + * sysfs_schedule_callback - helper to schedule a callback for a kobject + * @kobj: object we're acting for. + * @func: callback function to invoke later. + * @data: argument to pass to @func. + * + * sysfs attribute methods must not unregister themselves or their parent + * kobject (which would amount to the same thing). Attempts to do so will + * deadlock, since unregistration is mutually exclusive with driver + * callbacks. + * + * Instead methods can call this routine, which will attempt to allocate + * and schedule a workqueue request to call back @func with @data as its + * argument in the workqueue's process context. @kobj will be pinned + * until @func returns. + * + * Returns 0 if the request was submitted, -ENOMEM if storage could not + * be allocated. + */ +int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), + void *data) +{ + struct sysfs_schedule_callback_struct *ss; + + ss = kmalloc(sizeof(*ss), GFP_KERNEL); + if (!ss) + return -ENOMEM; + kobject_get(kobj); + ss->kobj = kobj; + ss->func = func; + ss->data = data; + INIT_WORK(&ss->work, sysfs_schedule_callback_work); + schedule_work(&ss->work); + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_schedule_callback); + EXPORT_SYMBOL_GPL(sysfs_create_file); EXPORT_SYMBOL_GPL(sysfs_remove_file); -- cgit v1.2.3 From e7b0d26a86943370c04d6833c6edba2a72a6e240 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Mar 2007 15:51:28 -0400 Subject: [PATCH] sysfs: reinstate exclusion between method calls and attribute unregistration This patch (as869) reinstates the mutual exclusion between sysfs attribute method calls and attribute unregistration. The previously-reported deadlocks have been fixed, and this exclusion is by far the simplest way to avoid races during driver unbinding. The check for orphaned read-buffers has been moved down slightly, so that the remainder of a partially-read buffer will still be available to userspace even after the attribute has been unregistered. Signed-off-by: Alan Stern Cc: Hugh Dickins Cc: Cornelia Huck Cc: Oliver Neukum Signed-off-by: Linus Torvalds --- fs/sysfs/file.c | 10 +++++----- fs/sysfs/inode.c | 10 +++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1bafdf6e171..fc4633378dc 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -168,12 +168,12 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) ssize_t retval = 0; down(&buffer->sem); - if (buffer->orphaned) { - retval = -ENODEV; - goto out; - } if (buffer->needs_read_fill) { - if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + if (buffer->orphaned) + retval = -ENODEV; + else + retval = fill_read_buffer(file->f_path.dentry,buffer); + if (retval) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index ccb7d722c55..4de5c6b8991 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -222,13 +222,17 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) static inline void orphan_all_buffers(struct inode *node) { - struct sysfs_buffer_collection *set = node->i_private; + struct sysfs_buffer_collection *set; struct sysfs_buffer *buf; mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); - if (node->i_private) { - list_for_each_entry(buf, &set->associates, associates) + set = node->i_private; + if (set) { + list_for_each_entry(buf, &set->associates, associates) { + down(&buf->sem); buf->orphaned = 1; + up(&buf->sem); + } } mutex_unlock(&node->i_mutex); } -- cgit v1.2.3 From 38e2aff670b681b6cc267aca307633cbcb48864b Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 16 Mar 2007 05:12:53 +0000 Subject: [CIFS] Do not negotiate new POSIX_PATH_OPERATIONS_CAP yet Samba server now expects that clients which send the new POSIX_PATH_OPERATIONS_CAP send all opens with this new SMB - and expects that clients that could send the new posix open/create but don't as indicating that they really want Windows semantics on that handle (which allows Samba to support clients which want to support both types of behaviors on different handles on the same mount) We will put this capability back in the SetFSInfo negotiation with servers like Samba when the new POSIXCreate (create/open/mkdir) code is finished. Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 0efdf35aab2..e894545f99d 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1887,7 +1887,13 @@ typedef struct { calls including posix open and posix unlink */ #ifdef CONFIG_CIFS_POSIX -#define CIFS_UNIX_CAP_MASK 0x0000003b +/* Can not set pathnames cap yet until we send new posix create SMB since + otherwise server can treat such handles opened with older ntcreatex + (by a new client which knows how to send posix path ops) + as non-posix handles (can affect write behavior with byte range locks. + We can add back in POSIX_PATH_OPS cap when Posix Create/Mkdir finished */ +/* #define CIFS_UNIX_CAP_MASK 0x0000003b */ +#define CIFS_UNIX_CAP_MASK 0x0000001b #else #define CIFS_UNIX_CAP_MASK 0x00000013 #endif /* CONFIG_CIFS_POSIX */ -- cgit v1.2.3 From 2189850f42beff23af32d847bd043cd1d1811a80 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Fri, 16 Mar 2007 13:38:07 -0800 Subject: [PATCH] ufs2: more correct work with time This patch corrects work with time in UFS2 case. 1) According to UFS2 disk layout modification/access and so on "time" should be hold in two variables one 64bit for seconds and another 32bit for nanoseconds, at now for some unknown reason we suppose that "inode time" holds in three variables 32bit for seconds, 32bit for milliseconds and 32bit for nanoseconds. 2) We set amount of nanoseconds in "VFS inode" to 0 during read, instead of getting values from "on disk inode"(this should close http://bugzilla.kernel.org/show_bug.cgi?id=7991). Signed-off-by: Evgeniy Dushistov Cc: Bjoern Jacke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/ialloc.c | 5 ++--- fs/ufs/inode.c | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index b868878009b..c28a8b6f2fe 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -343,9 +343,8 @@ cg_found: lock_buffer(bh); ufs2_inode = (struct ufs2_inode *)bh->b_data; ufs2_inode += ufs_inotofsbo(inode->i_ino); - ufs2_inode->ui_birthtime.tv_sec = - cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec); - ufs2_inode->ui_birthtime.tv_usec = 0; + ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec); + ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); if (sb->s_flags & MS_SYNCHRONOUS) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index fb34ad03e22..366618dd698 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -668,12 +668,12 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid); inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); - inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_atime.tv_sec); - inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_ctime.tv_sec); - inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_mtime.tv_sec); - inode->i_mtime.tv_nsec = 0; - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; + inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); + inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime); + inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime); + inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec); + inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec); + inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec); inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen); ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags); @@ -803,12 +803,12 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode) ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid); ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); - ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); - ufs_inode->ui_atime.tv_usec = 0; - ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec); - ufs_inode->ui_ctime.tv_usec = 0; - ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec); - ufs_inode->ui_mtime.tv_usec = 0; + ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); + ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec); + ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec); + ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec); + ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec); + ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec); ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks); ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags); -- cgit v1.2.3 From 5431bf97ce69065ed07de1ff12543d0800817b83 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Fri, 16 Mar 2007 13:38:08 -0800 Subject: [PATCH] ufs: prepare write + change blocks on the fly This fixes "change blocks numbers on the fly" in case when "prepare write page" is in the call chain, in this case some buffers may be not uptodate and not mapped, we should care to map them and load from disk. This patch was tested with: - ufs regressions simple tests - fsx-linux - ltp(20060306) - untar and build kernel Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/balloc.c | 81 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index bcc44084e00..b8fa34af87c 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -244,62 +244,87 @@ failed: * We can come here from ufs_writepage or ufs_prepare_write, * locked_page is argument of these functions, so we already lock it. */ -static void ufs_change_blocknr(struct inode *inode, unsigned int beg, - unsigned int count, unsigned int oldb, - unsigned int newb, struct page *locked_page) +static void ufs_change_blocknr(struct inode *inode, sector_t beg, + unsigned int count, sector_t oldb, + sector_t newb, struct page *locked_page) { - const unsigned mask = (1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1; + const unsigned blks_per_page = + 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); + const unsigned mask = blks_per_page - 1; struct address_space * const mapping = inode->i_mapping; - pgoff_t index, cur_index; - unsigned end, pos, j; + pgoff_t index, cur_index, last_index; + unsigned pos, j, lblock; + sector_t end, i; struct page *page; struct buffer_head *head, *bh; - UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n", - inode->i_ino, count, oldb, newb); + UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n", + inode->i_ino, count, + (unsigned long long)oldb, (unsigned long long)newb); BUG_ON(!locked_page); BUG_ON(!PageLocked(locked_page)); cur_index = locked_page->index; - - for (end = count + beg; beg < end; beg = (beg | mask) + 1) { - index = beg >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + end = count + beg; + last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + for (i = beg; i < end; i = (i | mask) + 1) { + index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); if (likely(cur_index != index)) { page = ufs_get_locked_page(mapping, index); - if (!page || IS_ERR(page)) /* it was truncated or EIO */ + if (!page)/* it was truncated */ + continue; + if (IS_ERR(page)) {/* or EIO */ + ufs_error(inode->i_sb, __FUNCTION__, + "read of page %llu failed\n", + (unsigned long long)index); continue; + } } else page = locked_page; head = page_buffers(page); bh = head; - pos = beg & mask; + pos = i & mask; for (j = 0; j < pos; ++j) bh = bh->b_this_page; - j = 0; + + + if (unlikely(index == last_index)) + lblock = end & mask; + else + lblock = blks_per_page; + do { - if (buffer_mapped(bh)) { - pos = bh->b_blocknr - oldb; - if (pos < count) { - UFSD(" change from %llu to %llu\n", - (unsigned long long)pos + oldb, - (unsigned long long)pos + newb); - bh->b_blocknr = newb + pos; - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - mark_buffer_dirty(bh); - ++j; + if (j >= lblock) + break; + pos = (i - beg) + j; + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, oldb + pos); + if (!buffer_uptodate(bh)) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + ufs_error(inode->i_sb, __FUNCTION__, + "read of block failed\n"); + break; } } + UFSD(" change from %llu to %llu, pos %u\n", + (unsigned long long)pos + oldb, + (unsigned long long)pos + newb, pos); + + bh->b_blocknr = newb + pos; + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + mark_buffer_dirty(bh); + ++j; bh = bh->b_this_page; } while (bh != head); - if (j) - set_page_dirty(page); - if (likely(cur_index != index)) ufs_put_locked_page(page); } -- cgit v1.2.3 From 4b25a37e2093146c1f9aa436b832b7d4ef880ca4 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Fri, 16 Mar 2007 13:38:09 -0800 Subject: [PATCH] ufs: zeroize the rest of block in truncate This patch fix behaviour in such test scenario: lseek(fd, BIG_OFFSET) write(fd, buf, sizeof(buf)) truncate(BIG_OFFSET) truncate(BIG_OFFSET + sizeof(buf)) read(fd, buf...) Because of if file big enough(BIG_OFFSET) we start allocate space by block, ordinary block size > page size, so we should zeroize the rest of block in truncate(except last framgnet, about which VFS should care), to not get garbage, when we extend file. Also patch corrects conversion from pointer to block to physical block number, this helps in case of not common used UFS types. And add to debug output inode number. Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/balloc.c | 5 +++-- fs/ufs/inode.c | 12 ++++++------ fs/ufs/truncate.c | 36 ++++++++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b8fa34af87c..841ac25fd95 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -482,8 +482,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp, - result, locked_page); + ufs_change_blocknr(inode, fragment - oldcount, oldcount, + uspi->s_sbbase + tmp, + uspi->s_sbbase + result, locked_page); ufs_cpu_to_data_ptr(sb, p, result); *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 366618dd698..013d7afe7cd 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -212,7 +212,7 @@ repeat: brelse (result); goto repeat; } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; return NULL; } } @@ -282,9 +282,9 @@ repeat: } if (!phys) { - result = sb_getblk(sb, tmp + blockoff); + result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; result = NULL; *err = 0; *new = 1; @@ -368,7 +368,7 @@ repeat: brelse (result); goto repeat; } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; goto out; } } @@ -389,9 +389,9 @@ repeat: if (!phys) { - result = sb_getblk(sb, tmp + blockoff); + result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; *new = 1; } diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 749581fa772..52fb2b59f06 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -74,7 +74,7 @@ static int ufs_trunc_direct(struct inode *inode) unsigned i, tmp; int retry; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -96,8 +96,8 @@ static int ufs_trunc_direct(struct inode *inode) block2 = ufs_fragstoblks (frag3); } - UFSD("frag1 %llu, frag2 %llu, block1 %llu, block2 %llu, frag3 %llu," - " frag4 %llu\n", + UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu," + " frag3 %llu, frag4 %llu\n", inode->i_ino, (unsigned long long)frag1, (unsigned long long)frag2, (unsigned long long)block1, (unsigned long long)block2, (unsigned long long)frag3, (unsigned long long)frag4); @@ -163,7 +163,7 @@ next1: mark_inode_dirty(inode); next3: - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -248,7 +248,7 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) } ubh_brelse (ind_ubh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -262,7 +262,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) void *dind; int retry = 0; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -312,7 +312,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) } ubh_brelse (dind_bh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -327,7 +327,7 @@ static int ufs_trunc_tindirect(struct inode *inode) void *tind, *p; int retry; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); retry = 0; @@ -372,19 +372,21 @@ static int ufs_trunc_tindirect(struct inode *inode) } ubh_brelse (tind_bh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } static int ufs_alloc_lastblock(struct inode *inode) { int err = 0; + struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; - struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; unsigned i, end; sector_t lastfrag; struct page *lastpage; struct buffer_head *bh; + u64 phys64; lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -424,6 +426,20 @@ static int ufs_alloc_lastblock(struct inode *inode) set_page_dirty(lastpage); } + if (lastfrag >= UFS_IND_FRAGMENT) { + end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1; + phys64 = bh->b_blocknr + 1; + for (i = 0; i < end; ++i) { + bh = sb_getblk(sb, i + phys64); + lock_buffer(bh); + memset(bh->b_data, 0, sb->s_blocksize); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + sync_dirty_buffer(bh); + brelse(bh); + } + } out_unlock: ufs_put_locked_page(lastpage); out: -- cgit v1.2.3 From 0465fc0a1c42e18438d391f3a7e661493a9ad68e Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Fri, 16 Mar 2007 13:38:09 -0800 Subject: [PATCH] ufs2: tindirect truncate fix During modification of code to support UFS2 writing, the case with "three indirect" blocks in truncate path was missed, this patch fixes this situation. Signed-off-by: Evgeniy Dushistov Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/truncate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 52fb2b59f06..79c54c85fb5 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -348,7 +348,7 @@ static int ufs_trunc_tindirect(struct inode *inode) } for (i = tindirect_block ; i < uspi->s_apb ; i++) { - tind = ubh_get_addr32 (tind_bh, i); + tind = ubh_get_data_ptr(uspi, tind_bh, i); retry |= ufs_trunc_dindirect(inode, UFS_NDADDR + uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind); ubh_mark_buffer_dirty(tind_bh); -- cgit v1.2.3 From b228b8e5bf96b740a70871c1a248bb65c267f5f2 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Fri, 16 Mar 2007 13:38:22 -0800 Subject: [PATCH] eCryptfs: fix possible NULL ptr deref in ecryptfs_d_release() ecryptfs_d_release() first dereferences a pointer (via ecryptfs_dentry_to_lower()) and then afterwards checks to see if the pointer it just dereferenced is NULL (via ecryptfs_dentry_to_private()). This patch moves all of the work done on the dereferenced pointer inside a block governed by the condition that the pointer is non-NULL. Signed-off-by: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/dentry.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 329efcd3d8c..cb20b964419 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -78,18 +78,13 @@ struct kmem_cache *ecryptfs_dentry_info_cache; */ static void ecryptfs_d_release(struct dentry *dentry) { - struct dentry *lower_dentry; - - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (ecryptfs_dentry_to_private(dentry)) + if (ecryptfs_dentry_to_private(dentry)) { + if (ecryptfs_dentry_to_lower(dentry)) { + mntput(ecryptfs_dentry_to_lower_mnt(dentry)); + dput(ecryptfs_dentry_to_lower(dentry)); + } kmem_cache_free(ecryptfs_dentry_info_cache, ecryptfs_dentry_to_private(dentry)); - if (lower_dentry) { - struct vfsmount *lower_mnt = - ecryptfs_dentry_to_lower_mnt(dentry); - - mntput(lower_mnt); - dput(lower_dentry); } return; } -- cgit v1.2.3 From 1174cf730179d8f029b9e93cb9a4d5bfb08d1202 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 16 Mar 2007 13:38:24 -0800 Subject: [PATCH] smbfs: double free memory corruption smbfs allocates rq_trans2buffer to handle server's multi transaction2 response messages. As struct smb_request may be reused, rq_trans2buffer is freed before each new request. However if last servers's response is not multi but single trans2 message then new rq_trans2buffer is not allocated but last smb_rput still tries to free it again. To prevent this issue rq_trans2buffer pointer should be set to NULL after kfree. Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/smbfs/request.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 42261dbdf60..723f7c66766 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -181,6 +181,7 @@ static int smb_setup_request(struct smb_request *req) req->rq_errno = 0; req->rq_fragment = 0; kfree(req->rq_trans2buffer); + req->rq_trans2buffer = NULL; return 0; } -- cgit v1.2.3 From b74a2f0913694556a027795d2954d30523fac4c5 Mon Sep 17 00:00:00 2001 From: suzuki Date: Fri, 16 Mar 2007 13:38:25 -0800 Subject: [PATCH] fix rescan_partitions to return errors properly The only error code which comes from the partition checkers is -1, when they finds an EIO. As per the discussion, ENOMEM values were ignored, as they might scare the users. So, with the current code, we end up returning -1 and not EIO for the ioctl() calls. Which doesn't give any clue to the user of what went wrong. Signed-off-by: Suzuki K P Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/partitions/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e46d237b10f..8a7d0035ad7 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -541,7 +541,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) /* I/O error reading the partition table */ - return PTR_ERR(state); + return -EIO; for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; -- cgit v1.2.3 From 89a09141df6ac1c3821fbe44ca8384eb37692965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Mar 2007 13:38:26 -0800 Subject: [PATCH] nfs: fix congestion control The current NFS client congestion logic is severly broken, it marks the backing device congested during each nfs_writepages() call but doesn't mirror this in nfs_writepage() which makes for deadlocks. Also it implements its own waitqueue. Replace this by a more regular congestion implementation that puts a cap on the number of active writeback pages and uses the bdi congestion waitqueue. Also always use an interruptible wait since it makes sense to be able to SIGKILL the process even for mounts without 'intr'. Signed-off-by: Peter Zijlstra Acked-by: Trond Myklebust Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 4 +- fs/nfs/sysctl.c | 8 ++++ fs/nfs/write.c | 116 ++++++++++++++++++++++++++++++++++++-------------------- 3 files changed, 84 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bb516a2cfba..f1eae44b9a1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -151,10 +151,10 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; -#ifdef CONFIG_NFS_V4 ret = nfs_register_sysctl(); if (ret < 0) goto error_1; +#ifdef CONFIG_NFS_V4 ret = register_filesystem(&nfs4_fs_type); if (ret < 0) goto error_2; @@ -165,9 +165,9 @@ int __init register_nfs_fs(void) #ifdef CONFIG_NFS_V4 error_2: nfs_unregister_sysctl(); +#endif error_1: unregister_filesystem(&nfs_fs_type); -#endif error_0: return ret; } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index fcdcafbb329..b62481dabae 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -50,6 +50,14 @@ static ctl_table nfs_cb_sysctls[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_congestion_kb", + .data = &nfs_congestion_kb, + .maxlen = sizeof(nfs_congestion_kb), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index febdade9167..2867e6b7096 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); static void nfs_mark_request_dirty(struct nfs_page *req); -static int nfs_wait_on_write_congestion(struct address_space *, int); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); - struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); @@ -210,6 +208,40 @@ static int wb_priority(struct writeback_control *wbc) return 0; } +/* + * NFS congestion control + */ + +int nfs_congestion_kb; + +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) +#define NFS_CONGESTION_OFF_THRESH \ + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) + +static void nfs_set_page_writeback(struct page *page) +{ + if (!test_set_page_writeback(page)) { + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (atomic_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) + set_bdi_congested(&nfss->backing_dev_info, WRITE); + } +} + +static void nfs_end_page_writeback(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + end_page_writeback(page); + if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { + clear_bdi_congested(&nfss->backing_dev_info, WRITE); + congestion_end(WRITE); + } +} + /* * Find an associated nfs write request, and prepare to flush it out * Returns 1 if there was no write request, or if the request was @@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page) spin_unlock(req_lock); if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { nfs_mark_request_dirty(req); - set_page_writeback(page); + nfs_set_page_writeback(page); } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); @@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) return err; } -/* - * Note: causes nfs_update_request() to block on the assumption - * that the writeback is generated due to memory pressure. - */ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; struct inode *inode = mapping->host; int err; @@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = generic_writepages(mapping, wbc); if (err) return err; - while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { - if (wbc->nonblocking) - return 0; - nfs_wait_on_write_congestion(mapping, 0); - } err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); if (err < 0) goto out; nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); err = 0; out: - clear_bit(BDI_write_congested, &bdi->state); - wake_up_all(&nfs_write_congestion); - congestion_end(WRITE); return err; } @@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } /* - * Insert a write request into an inode + * Remove a write request from an inode */ static void nfs_inode_remove_request(struct nfs_page *req) { @@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un } #endif -static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) +static int nfs_wait_on_write_congestion(struct address_space *mapping) { + struct inode *inode = mapping->host; struct backing_dev_info *bdi = mapping->backing_dev_info; - DEFINE_WAIT(wait); int ret = 0; might_sleep(); @@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; - nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); - if (intr) { - struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); + do { + struct rpc_clnt *clnt = NFS_CLIENT(inode); sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); - prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); - if (bdi_write_congested(bdi)) { - if (signalled()) - ret = -ERESTARTSYS; - else - schedule(); - } + ret = congestion_wait_interruptible(WRITE, HZ/10); rpc_clnt_sigunmask(clnt, &oldset); - } else { - prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); - if (bdi_write_congested(bdi)) - schedule(); - } - finish_wait(&nfs_write_congestion, &wait); + if (ret == -ERESTARTSYS) + break; + ret = 0; + } while (bdi_write_congested(bdi)); + return ret; } - /* * Try to update any existing write request, or create one if there is none. * In order to match, the request's credentials must match those of @@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; unsigned long rqend, end; end = offset + bytes; - if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) + if (nfs_wait_on_write_congestion(mapping)) return ERR_PTR(-ERESTARTSYS); for (;;) { /* Loop over all inode entries and see if we find @@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - end_page_writeback(req->wb_page); + nfs_end_page_writeback(req->wb_page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (!PageError(req->wb_page)) { @@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); req->wb_context->error = task->tk_status; - end_page_writeback(page); + nfs_end_page_writeback(page); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; } - end_page_writeback(page); + nfs_end_page_writeback(page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { @@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void) if (nfs_commit_mempool == NULL) return -ENOMEM; + /* + * NFS congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (nfs_congestion_kb > 256*1024) + nfs_congestion_kb = 256*1024; + return 0; } -- cgit v1.2.3 From 634707388baa440d9c9d082cfc4c950500c8952b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Mar 2007 13:38:28 -0800 Subject: [PATCH] nfs: nfs_getattr() can't call nfs_sync_mapping_range() for non-regular files Looks like we need a check in nfs_getattr() for a regular file. It makes no sense to call nfs_sync_mapping_range() on anything else. I think that should fix your problem: it will stop the NFS client from interfering with dirty pages on that inode's mapping. Signed-off-by: Trond Myklebust Acked-by: Olof Johansson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index af53c02f473..93d046c85f5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -429,7 +429,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + if (S_ISREG(inode->i_mode)) + nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. -- cgit v1.2.3 From e5d480ff17f9220cd6198b5c91e8c75329f5594f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Mar 2007 13:38:32 -0800 Subject: [PATCH] change misleading EFI partition support description Remove the misleading "Presently only useful on the IA-64 platform" text from the EFI partition Kconfig. EFI partitions are also used by Apple on their Intel-based machines and thus you need EFI partition support if you (for example) want to attach such a machine in target disk mode. Signed-off-by: Johannes Berg Acked-by: Matt Domsch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/partitions/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 74552c60b67..6e8bb66fe61 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -235,5 +235,4 @@ config EFI_PARTITION select CRC32 help Say Y here if you would like to use hard disks under Linux which - were partitioned using EFI GPT. Presently only useful on the - IA-64 platform. + were partitioned using EFI GPT. -- cgit v1.2.3 From d1cabd63262707ad5d6bb730f25b7a2852734595 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 16 Mar 2007 13:38:35 -0800 Subject: [PATCH] fix process crash caused by randomisation and 64k pages This bug was seen on ppc64, but it could have occurred on any architecture with a page size of 64k or above. The problem is that in fs/binfmt_elf.c:randomize_stack_top() randomizes the stack to within 0x7ff pages. On 4k page machines, this is 8MB; on 64k page boxes, this is 128MB. The problem is that the new binary layout (selected in arch_pick_mmap_layout) places the mapping segment 128MB or the stack rlimit away from the top of the process memory, whichever is larger. If you chose an rlimit of less than 128MB (most defaults are in the 8Mb range) then you can end up having your entire stack randomized away. The fix is to make randomize_stack_top() only steal at most 8MB, which this patch does. However, I have to point out that even with this, your stack rlimit might not be exactly what you get if it's > 128MB, because you're still losing the random offset of up to 8MB. The true fix should be to leave an explicit gap for the randomization plus a buffer when determining mmap_base, but that would involve fixing all the architectures. Cc: Arjan van de Ven Cc: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 51db1182b27..a2fceba7ef8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -507,7 +507,7 @@ out: #define INTERPRETER_ELF 2 #ifndef STACK_RND_MASK -#define STACK_RND_MASK 0x7ff /* with 4K pages 8MB of VA */ +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ #endif static unsigned long randomize_stack_top(unsigned long stack_top) -- cgit v1.2.3 From 066fcb06d3e27c258bc229bb688ced2b16daa6c2 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 Mar 2007 00:45:08 +0000 Subject: [CIFS] Allow reset of file to ATTR_NORMAL when archive bit not set When a file had a dos attribute of 0x1 (readonly - but dos attribute of archive was not set) - doing chmod 0777 or equivalent would try to set a dos attribute of 0 (which some servers ignore) rather than ATTR_NORMAL (0x20) which most servers accept. Does not affect servers which support the CIFS Unix Extensions. Acked-by: Prasad Potluri Acked-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/CHANGES | 5 ++++- fs/cifs/inode.c | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1cbe5615993..5d1f4873d70 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -6,7 +6,10 @@ on smp system corrupts sequence number. Do not reread unnecessarily partial page (which we are about to overwrite anyway) when writing out file opened rw. When DOS attribute of file on non-Unix server's file changes on the server side from read-only back to read-write, reflect this change in default file mode -(we had been leaving a file's mode read-only until the inode were reloaded) +(we had been leaving a file's mode read-only until the inode were reloaded). +Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute +when archive dos attribute not set and we are changing mode back to writeable +on server which does not support the Unix Extensions). Version 1.47 ------------ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e75a844accd..f414526e476 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1196,6 +1196,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; int set_time = FALSE; + int set_dosattr = FALSE; __u64 mode = 0xFFFFFFFFFFFFFFFFULL; __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; @@ -1332,15 +1333,23 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) else if (attrs->ia_valid & ATTR_MODE) { rc = 0; if ((mode & S_IWUGO) == 0) /* not writeable */ { - if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) + if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { + set_dosattr = TRUE; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | ATTR_READONLY); + } } else if ((mode & S_IWUGO) == S_IWUGO) { - if (cifsInode->cifsAttrs & ATTR_READONLY) + if (cifsInode->cifsAttrs & ATTR_READONLY) { + set_dosattr = TRUE; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & (~ATTR_READONLY)); + /* Windows ignores set to zero */ + if(time_buf.Attributes == 0) + time_buf.Attributes |= + cpu_to_le32(ATTR_NORMAL); + } } /* BB to be implemented - via Windows security descriptors or streams */ @@ -1378,7 +1387,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } else time_buf.ChangeTime = 0; - if (set_time || time_buf.Attributes) { + if (set_time || set_dosattr) { time_buf.CreationTime = 0; /* do not change */ /* In the future we should experiment - try setting timestamps via Handle (SetFileInfo) instead of by path */ -- cgit v1.2.3 From b43376927ab0f7b64c4fb304568ecfaea10446e2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Mar 2007 00:11:27 -0800 Subject: [PATCH] Make XFS workqueues nonfreezable Since freezable workqueues are broken in 2.6.21-rc (cf. http://marc.theaimsgroup.com/?l=linux-kernel&m=116855740612755, http://marc.theaimsgroup.com/?l=linux-kernel&m=117261312523921&w=2) it's better to change the only user of them, which is XFS, to use "normal" nonfreezable workqueues. Signed-off-by: Rafael J. Wysocki Cc: Pavel Machek Cc: David Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e2bea6a661f..69e9e80735d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1829,11 +1829,11 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out_free_trace_buf; - xfslogd_workqueue = create_freezeable_workqueue("xfslogd"); + xfslogd_workqueue = create_workqueue("xfslogd"); if (!xfslogd_workqueue) goto out_free_buf_zone; - xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad"); + xfsdatad_workqueue = create_workqueue("xfsdatad"); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; -- cgit v1.2.3 From aa289b47231c95abe53a75223906fdfb79ae368e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Mar 2007 00:10:00 -0700 Subject: [PATCH] FDPIC: fix the /proc/pid/stat representation of executable boundaries Fix the /proc/pid/stat representation of executable boundaries. It should show the bounds of the executable, but instead shows the bounds of the loader. Before the patch is applied, the bug can be seen by examining, say, inetd: # ps | grep inetd 610 root 0 S /usr/sbin/inetd -i # cat /proc/610/maps c0bb0000-c0bba788 r-xs 00000000 00:0b 14582157 /lib/ld-uClibc-0.9.28.so c3180000-c31dede4 r-xs 00000000 00:0b 14582179 /lib/libuClibc-0.9.28.so c328c000-c328ea00 rw-p 00008000 00:0b 14582157 /lib/ld-uClibc-0.9.28.so c3290000-c329b6c0 rw-p 00000000 00:00 0 c32a0000-c32c0000 rwxp 00000000 00:00 0 c32d4000-c32d8000 rw-p 00000000 00:00 0 c3394000-c3398000 rw-p 00000000 00:00 0 c3458000-c345f464 r-xs 00000000 00:0b 16384612 /usr/sbin/inetd c3470000-c34748f8 rw-p 00004000 00:0b 16384612 /usr/sbin/inetd c34cc000-c34d0000 rw-p 00000000 00:00 0 c34d4000-c34d8000 rw-p 00000000 00:00 0 c34d8000-c34dc000 rw-p 00000000 00:00 0 # cat /proc/610/stat 610 (inetd) S 1 610 610 0 -1 256 0 0 0 0 0 8 0 0 19 0 1 0 94392000718 950272 0 4294967295 3233480704 3233523592 3274440352 3274439976 3273467584 0 0 4096 90115 3221712796 0 0 17 0 0 0 0 The code boundaries are 3233480704 to 3233523592, which are: (gdb) p/x 3233480704 $1 = 0xc0bb0000 (gdb) p/x 3233523592 $2 = 0xc0bba788 Which corresponds to this line in the maps file: c0bb0000-c0bba788 r-xs 00000000 00:0b 14582157 /lib/ld-uClibc-0.9.28.so Which is wrong. After the patch is applied, the maps file is pretty much identical (there's some minor shuffling of the location of some of the anonymous VMAs), but the stat file is now: # cat /proc/610/stat 610 (inetd) S 1 610 610 0 -1 256 0 0 0 0 0 7 0 0 18 0 1 0 94392000722 950272 0 4294967295 3276111872 3276141668 3274440352 3274439976 3273467584 0 0 4096 90115 3221712796 0 0 17 0 0 0 0 The code boundaries are then 3276111872 to 3276141668, which are: (gdb) p/x 3276111872 $1 = 0xc3458000 (gdb) p/x 3276141668 $2 = 0xc345f464 And these correspond to this line in the maps file instead: c3458000-c345f464 r-xs 00000000 00:0b 16384612 /usr/sbin/inetd Which is now correct. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 5810aa1339f..47d6d49d1fb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -179,6 +179,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, int executable_stack; int retval, i; + kdebug("____ LOAD %d ____", current->pid); + memset(&exec_params, 0, sizeof(exec_params)); memset(&interp_params, 0, sizeof(interp_params)); @@ -941,8 +943,11 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( if (mm) { if (phdr->p_flags & PF_X) { - mm->start_code = seg->addr; - mm->end_code = seg->addr + phdr->p_memsz; + if (!mm->start_code) { + mm->start_code = seg->addr; + mm->end_code = seg->addr + + phdr->p_memsz; + } } else if (!mm->start_data) { mm->start_data = seg->addr; #ifndef CONFIG_MMU @@ -1123,8 +1128,10 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (mm) { if (phdr->p_flags & PF_X) { - mm->start_code = maddr; - mm->end_code = maddr + phdr->p_memsz; + if (!mm->start_code) { + mm->start_code = maddr; + mm->end_code = maddr + phdr->p_memsz; + } } else if (!mm->start_data) { mm->start_data = maddr; mm->end_data = maddr + phdr->p_memsz; -- cgit v1.2.3 From 105fd108a66ceff2b0fb710582b97d61ee4c9d40 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 23 Mar 2007 00:10:02 -0700 Subject: [PATCH] "ext[34]: EA block reference count racing fix" performance fix A little mistake in 8a2bfdcbfa441d8b0e5cb9c9a7f45f77f80da465 is making all transactions synchronous, which reduces ext3 performance to comical levels. Cc: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 12f7dda1232..f58cbb26323 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -495,7 +495,8 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode, BHDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(BHDR(bh)->h_refcount) - 1); error = ext3_journal_dirty_metadata(handle, bh); - handle->h_sync = 1; + if (IS_SYNC(inode)) + handle->h_sync = 1; DQUOT_FREE_BLOCK(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); -- cgit v1.2.3 From d32b687e2e16e8174e88f186c3bae39a13e57b3d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 Mar 2007 02:49:48 +0100 Subject: 9p: make struct v9fs_cached_file_operations static This patch makes te needlessly global struct v9fs_cached_file_operations static. Signed-off-by: Adrian Bunk Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 1 - fs/9p/vfs_file.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 8ada4c5c5d7..6a82d39dc49 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -40,7 +40,6 @@ extern struct file_system_type v9fs_fs_type; extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; -extern const struct file_operations v9fs_cached_file_operations; extern const struct file_operations v9fs_dir_operations; extern struct dentry_operations v9fs_dentry_operations; extern struct dentry_operations v9fs_cached_dentry_operations; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 653dfa5b253..c7b67725384 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -42,6 +42,8 @@ #include "v9fs_vfs.h" #include "fid.h" +static const struct file_operations v9fs_cached_file_operations; + /** * v9fs_file_open - open a file (or directory) * @inode: inode to be opened @@ -245,7 +247,7 @@ v9fs_file_write(struct file *filp, const char __user * data, return total; } -const struct file_operations v9fs_cached_file_operations = { +static const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, -- cgit v1.2.3 From 78062cb2e54ffe0df811dce5e68b54da9b8c9025 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 22 Mar 2007 17:01:07 -0700 Subject: ocfs2_dlm: Fix lockres ref counting bug During umount, the umount thread migrates the lockres' and the dlm_thread frees the empty lockres'. Due to a race, the reference counting on the lockres goes awry leading to extra puts. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmdomain.c | 6 ++++-- fs/ocfs2/dlm/dlmthread.c | 10 ++-------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6087c4749fe..dbbac184c26 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -138,8 +138,10 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) { - hlist_del_init(&lockres->hash_node); - dlm_lockres_put(lockres); + if (!hlist_unhashed(&lockres->hash_node)) { + hlist_del_init(&lockres->hash_node); + dlm_lockres_put(lockres); + } } void __dlm_insert_lockres(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 6421a8fae1d..2b264c6ba03 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -256,20 +256,14 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, break; } - mlog(0, "removing lockres %.*s:%p from purgelist\n", - lockres->lockname.len, lockres->lockname.name, lockres); - list_del_init(&lockres->purge); - dlm_lockres_put(lockres); - dlm->purge_count--; + dlm_lockres_get(lockres); /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ - mlog(0, "calling dlm_purge_lockres!\n"); - dlm_lockres_get(lockres); if (dlm_purge_lockres(dlm, lockres)) BUG(); + dlm_lockres_put(lockres); - mlog(0, "DONE calling dlm_purge_lockres!\n"); /* Avoid adding any scheduling latencies */ cond_resched_lock(&dlm->spinlock); -- cgit v1.2.3 From 2f5bf1f2d061dea5146aa283685ce2b00cea2f3d Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 22 Mar 2007 17:08:32 -0700 Subject: ocfs2_dlm: Check for migrateable lockres in dlm_empty_lockres() In dlm_migrate_lockres(), we check upfront whether the lockres is a candidate for migration. This patch encapsulates that code in a separate function so that dlm_empty_lockres() can also use it during umount. This patch addresses the umount process spinning problem. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmdomain.c | 2 + fs/ocfs2/dlm/dlmmaster.c | 99 +++++++++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index dbbac184c26..c558442a0b4 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -657,6 +657,8 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_kick_thread(dlm, NULL); while (dlm_migrate_all_locks(dlm)) { + /* Give dlm_thread time to purge the lockres' */ + msleep(500); mlog(0, "%s: more migration to do\n", dlm->name); } dlm_mark_domain_leaving(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9229e04362f..6edffca99d9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2424,6 +2424,57 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) dlm_lockres_put(res); } +/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 + * if not. If 0, numlocks is set to the number of locks in the lockres. + */ +static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + int *numlocks) +{ + int ret; + int i; + int count = 0; + struct list_head *queue, *iter; + struct dlm_lock *lock; + + assert_spin_locked(&res->spinlock); + + ret = -EINVAL; + if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + mlog(0, "cannot migrate lockres with unknown owner!\n"); + goto leave; + } + + if (res->owner != dlm->node_num) { + mlog(0, "cannot migrate lockres this node doesn't own!\n"); + goto leave; + } + + ret = 0; + queue = &res->granted; + for (i = 0; i < 3; i++) { + list_for_each(iter, queue) { + lock = list_entry(iter, struct dlm_lock, list); + ++count; + if (lock->ml.node == dlm->node_num) { + mlog(0, "found a lock owned by this node still " + "on the %s queue! will not migrate this " + "lockres\n", (i == 0 ? "granted" : + (i == 1 ? "converting" : + "blocked"))); + ret = -ENOTEMPTY; + goto leave; + } + } + queue++; + } + + *numlocks = count; + mlog(0, "migrateable lockres having %d locks\n", *numlocks); + +leave: + return ret; +} /* * DLM_MIGRATE_LOCKRES @@ -2437,14 +2488,12 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle = NULL; struct dlm_master_list_entry *oldmle = NULL; struct dlm_migratable_lockres *mres = NULL; - int ret = -EINVAL; + int ret = 0; const char *name; unsigned int namelen; int mle_added = 0; - struct list_head *queue, *iter; - int i; - struct dlm_lock *lock; - int empty = 1, wake = 0; + int numlocks; + int wake = 0; if (!dlm_grab(dlm)) return -EINVAL; @@ -2458,42 +2507,16 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, * ensure this lockres is a proper candidate for migration */ spin_lock(&res->spinlock); - if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "cannot migrate lockres with unknown owner!\n"); - spin_unlock(&res->spinlock); - goto leave; - } - if (res->owner != dlm->node_num) { - mlog(0, "cannot migrate lockres this node doesn't own!\n"); + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); + if (ret < 0) { spin_unlock(&res->spinlock); goto leave; } - mlog(0, "checking queues...\n"); - queue = &res->granted; - for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry (iter, struct dlm_lock, list); - empty = 0; - if (lock->ml.node == dlm->node_num) { - mlog(0, "found a lock owned by this node " - "still on the %s queue! will not " - "migrate this lockres\n", - i==0 ? "granted" : - (i==1 ? "converting" : "blocked")); - spin_unlock(&res->spinlock); - ret = -ENOTEMPTY; - goto leave; - } - } - queue++; - } - mlog(0, "all locks on this lockres are nonlocal. continuing\n"); spin_unlock(&res->spinlock); /* no work to do */ - if (empty) { + if (numlocks == 0) { mlog(0, "no locks were found on this lockres! done!\n"); - ret = 0; goto leave; } @@ -2729,6 +2752,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int ret; int lock_dropped = 0; + int numlocks; spin_lock(&res->spinlock); if (res->owner != dlm->node_num) { @@ -2740,6 +2764,13 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) spin_unlock(&res->spinlock); goto leave; } + + /* No need to migrate a lockres having no locks */ + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); + if (ret >= 0 && numlocks == 0) { + spin_unlock(&res->spinlock); + goto leave; + } spin_unlock(&res->spinlock); /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ -- cgit v1.2.3 From 485ddb4b9741bafb70b22e5c1f9b4f37dc3e85bd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 27 Mar 2007 08:55:08 +0200 Subject: 1/2 splice: dont steal Stealing pages with splice is problematic because we cannot just insert an uptodate page into the pagecache and hope the filesystem can take care of it later. We also cannot just ClearPageUptodate, then hope prepare_write does not write anything into the page, because I don't think prepare_write gives that guarantee. Remove support for SPLICE_F_MOVE for now. If we really want to bring it back, we might be able to do so with a the new filesystem buffered write aops APIs I'm working on. If we really don't want to bring it back, then we should decide that sooner rather than later, and remove the flag and all the stealing infrastructure before anybody starts using it. Signed-off-by: Nick Piggin Signed-off-by: Jens Axboe --- fs/splice.c | 101 +++++++++++++++++++++++------------------------------------- 1 file changed, 38 insertions(+), 63 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 2fca6ebf4cc..badc78ff124 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -576,76 +576,51 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; - /* - * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full - * page. - */ - if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { +find_page: + page = find_lock_page(mapping, index); + if (!page) { + ret = -ENOMEM; + page = page_cache_alloc_cold(mapping); + if (unlikely(!page)) + goto out_ret; + /* - * If steal succeeds, buf->page is now pruned from the - * pagecache and we can reuse it. The page will also be - * locked on successful return. + * This will also lock the page */ - if (buf->ops->steal(pipe, buf)) - goto find_page; - - page = buf->page; - if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { - unlock_page(page); - goto find_page; - } - - page_cache_get(page); - - if (!(buf->flags & PIPE_BUF_FLAG_LRU)) - lru_cache_add(page); - } else { -find_page: - page = find_lock_page(mapping, index); - if (!page) { - ret = -ENOMEM; - page = page_cache_alloc_cold(mapping); - if (unlikely(!page)) - goto out_ret; + ret = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL); + if (unlikely(ret)) + goto out; + } - /* - * This will also lock the page - */ - ret = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); + /* + * We get here with the page locked. If the page is also + * uptodate, we don't need to do more. If it isn't, we + * may need to bring it in if we are not going to overwrite + * the full page. + */ + if (!PageUptodate(page)) { + if (this_len < PAGE_CACHE_SIZE) { + ret = mapping->a_ops->readpage(file, page); if (unlikely(ret)) goto out; - } - /* - * We get here with the page locked. If the page is also - * uptodate, we don't need to do more. If it isn't, we - * may need to bring it in if we are not going to overwrite - * the full page. - */ - if (!PageUptodate(page)) { - if (this_len < PAGE_CACHE_SIZE) { - ret = mapping->a_ops->readpage(file, page); - if (unlikely(ret)) - goto out; - - lock_page(page); - - if (!PageUptodate(page)) { - /* - * Page got invalidated, repeat. - */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto find_page; - } - ret = -EIO; - goto out; + lock_page(page); + + if (!PageUptodate(page)) { + /* + * Page got invalidated, repeat. + */ + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + goto find_page; } - } else - SetPageUptodate(page); - } + ret = -EIO; + goto out; + } + } else + SetPageUptodate(page); } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); -- cgit v1.2.3 From 08c72591636829d40bd695d43ec6d2a8191b668b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 27 Mar 2007 08:55:39 +0200 Subject: 2/2 splice: dont readpage Splice does not need to readpage to bring the page uptodate before writing to it, because prepare_write will take care of that for us. Splice is also wrong to SetPageUptodate before the page is actually uptodate. This results in the old uninitialised memory leak. This gets fixed as a matter of course when removing the readpage logic. Signed-off-by: Nick Piggin Signed-off-by: Jens Axboe --- fs/splice.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index badc78ff124..ae50208e3e6 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -593,36 +593,6 @@ find_page: goto out; } - /* - * We get here with the page locked. If the page is also - * uptodate, we don't need to do more. If it isn't, we - * may need to bring it in if we are not going to overwrite - * the full page. - */ - if (!PageUptodate(page)) { - if (this_len < PAGE_CACHE_SIZE) { - ret = mapping->a_ops->readpage(file, page); - if (unlikely(ret)) - goto out; - - lock_page(page); - - if (!PageUptodate(page)) { - /* - * Page got invalidated, repeat. - */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto find_page; - } - ret = -EIO; - goto out; - } - } else - SetPageUptodate(page); - } - ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); if (unlikely(ret)) { loff_t isize = i_size_read(mapping->host); -- cgit v1.2.3 From 40bee44eaef91b6030037c8bb47f909181fb1edc Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 21 Mar 2007 13:11:02 +0100 Subject: Export __splice_from_pipe() Ocfs2 wants to implement it's own splice write actor so that it can better manage cluster / page locks. This lets us re-use the rest of splice write while only providing our own code where it's actually important. Signed-off-by: Mark Fasheh Signed-off-by: Jens Axboe --- fs/splice.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index ae50208e3e6..07f6556add0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -651,9 +651,9 @@ out_ret: * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, size_t len, - unsigned int flags, splice_actor *actor) +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -747,6 +747,7 @@ static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, return ret; } +EXPORT_SYMBOL(__splice_from_pipe); ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, -- cgit v1.2.3 From 598b9a56373f0596ed48f7af730706aaee998d5f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 Mar 2007 21:32:08 -0800 Subject: [PATCH] knfsd: allow nfsd READDIR to return 64bit cookies ->readdir passes lofft_t offsets (used as nfs cookies) to nfs3svc_encode_entry{,_plus}, but when they pass it on to encode_entry it becomes an 'off_t', which isn't good. So filesystems that returned 64bit offsets would lose. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs3xdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 6f677988c71..7e4bb0af24d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -859,8 +859,8 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, #define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) #define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) static int -encode_entry(struct readdir_cd *ccd, const char *name, - int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus) +encode_entry(struct readdir_cd *ccd, const char *name, int namlen, + loff_t offset, ino_t ino, unsigned int d_type, int plus) { struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, common); @@ -880,7 +880,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, *cd->offset1 = htonl(offset64 & 0xffffffff); cd->offset1 = NULL; } else { - xdr_encode_hyper(cd->offset, (u64) offset); + xdr_encode_hyper(cd->offset, offset64); } } -- cgit v1.2.3 From 54c044094947826105317dadd01deca083627ea1 Mon Sep 17 00:00:00 2001 From: Bruce Fields Date: Mon, 26 Mar 2007 21:32:09 -0800 Subject: [PATCH] knfsd: nfsd4: fix inheritance flags on v4 ace derived from posix default ace A regression introduced in the last set of acl patches removed the INHERIT_ONLY flag from aces derived from the posix acl. Fix. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 832673b1458..673a53c014a 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -228,7 +228,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, struct posix_acl_summary pas; unsigned short deny; int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? - NFS4_INHERITANCE_FLAGS : 0); + NFS4_INHERITANCE_FLAGS | NFS4_ACE_INHERIT_ONLY_ACE : 0); BUG_ON(pacl->a_count < 3); summarize_posix_acl(pacl, &pas); -- cgit v1.2.3 From 21315edd4877b593d5bf17a601a48fc836b8ba58 Mon Sep 17 00:00:00 2001 From: Bruce Fields Date: Mon, 26 Mar 2007 21:32:09 -0800 Subject: [PATCH] knfsd: nfsd4: demote "clientid in use" printk to a dprintk The reused clientid here is a more of a problem for the client than the server, and the client can report the problem itself if it's serious. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e406799920..0b03c1ed21c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -750,9 +750,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_clid_inuse; if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred) || conf->cl_addr != sin->sin_addr.s_addr) { - printk("NFSD: setclientid: string in use by client" - "(clientid %08x/%08x)\n", - conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id); + dprintk("NFSD: setclientid: string in use by client" + "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr)); goto out; } } -- cgit v1.2.3 From 79f6523a16b2010969418f8df25fe61498dec66b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Mar 2007 21:32:10 -0800 Subject: [PATCH] knfsd: nfsd4: remove superfluous cancel_delayed_work() call This cancel_delayed_work call is called from a function that is only called from a piece of code that immediate follows a cancel and destruction of the workqueue, so it's clearly a mistake. Cc: Oleg Nesterov Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0b03c1ed21c..af360705e55 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3260,7 +3260,6 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - cancel_delayed_work(&laundromat_work); nfsd4_shutdown_recdir(); nfs4_init = 0; } -- cgit v1.2.3 From 5c46010af210712c8a390c7fe50ff05448983061 Mon Sep 17 00:00:00 2001 From: Mika Kukkonen Date: Mon, 26 Mar 2007 21:32:33 -0800 Subject: [PATCH] Fix kernel build with EMBEDDED & PROC_FS & !PROC_SYSCTL Without attached patch against current -git I get following with !PROC_SYSCTL (with EMBEDDED and PROC_FS set): CC init/version.o LD init/built-in.o LD vmlinux fs/built-in.o: In function `do_proc_sys_lookup': proc_sysctl.c:(.text+0x26583): undefined reference to `sysctl_head_next' fs/built-in.o: In function `proc_sys_revalidate': proc_sysctl.c:(.text+0x265bb): undefined reference to `sysctl_head_finish' fs/built-in.o: In function `proc_sys_readdir': proc_sysctl.c:(.text+0x26720): undefined reference to `sysctl_head_next' proc_sysctl.c:(.text+0x267d8): undefined reference to `sysctl_head_finish' proc_sysctl.c:(.text+0x268e7): undefined reference to `sysctl_head_next' proc_sysctl.c:(.text+0x26910): undefined reference to `sysctl_head_finish' fs/built-in.o: In function `proc_sys_write': proc_sysctl.c:(.text+0x2695d): undefined reference to `sysctl_perm' proc_sysctl.c:(.text+0x2699c): undefined reference to `sysctl_head_finish' fs/built-in.o: In function `proc_sys_read': proc_sysctl.c:(.text+0x269e9): undefined reference to `sysctl_perm' proc_sysctl.c:(.text+0x26a25): undefined reference to `sysctl_head_finish' fs/built-in.o: In function `proc_sys_permission': proc_sysctl.c:(.text+0x26ad1): undefined reference to `sysctl_perm' proc_sysctl.c:(.text+0x26adb): undefined reference to `sysctl_head_finish' fs/built-in.o: In function `proc_sys_lookup': proc_sysctl.c:(.text+0x26b39): undefined reference to `sysctl_head_finish' make: *** [vmlinux] Virhe 1 All those functions are in fs/proc/proc_sysctl.c, which has no CONFIG_ #define's in it, so the patch makes the compilation of that file to depend on CONFIG_PROC_SYSCTL (the simplest choice). Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index a6b3a8f878f..bce38e3f06c 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,8 +8,9 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o proc_misc.o proc_sysctl.o + proc_tty.o proc_misc.o +proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o -- cgit v1.2.3 From ed4bb1063171b2f44a40b0a9c400dedb0590dce6 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Fri, 23 Mar 2007 00:26:49 +0000 Subject: [PATCH] wext: Add missing ioctls to 64<->32 conversion Johannes Berg and Michael Buesch noticed that the WPA ioctls were missing from the 64<->32 bit conversion. This means that when using a 32 bits userspace on a 64 bit kernel, those ioctls fail. Signed-off-by: Jean Tourrilhes Signed-off-by: John W. Linville --- fs/compat_ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c81c958b3e1..8b1c5d8bf4e 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2553,11 +2553,15 @@ HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl) HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl) /* wireless */ HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl) @@ -2565,6 +2569,11 @@ HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl) -- cgit v1.2.3 From 28defbea64622f69d65a6079bf800cedb9915a5f Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 27 Mar 2007 15:44:01 -0700 Subject: [PATCH] aio: remove bare user-triggerable error printk The user can generate console output if they cause do_mmap() to fail during sys_io_setup(). This was seen in a regression test that does exactly that by spinning calling mmap() until it gets -ENOMEM before calling io_setup(). We don't need this printk at all, just remove it. Signed-off-by: Zach Brown Signed-off-by: Linus Torvalds --- fs/aio.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 0b4ee0a5c83..e4598d6d49d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -136,7 +136,6 @@ static int aio_setup_ring(struct kioctx *ctx) 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); - printk("mmap err: %ld\n", -info->mmap_base); info->mmap_size = 0; aio_free_ring(ctx); return -EAGAIN; -- cgit v1.2.3 From d9993c37ef87c758d4a6e63972395b1cf8a4cb7b Mon Sep 17 00:00:00 2001 From: Dmitriy Monakhov Date: Thu, 29 Mar 2007 14:24:09 +0200 Subject: [PATCH] splice: partial write fix Currently if partial write has happened while ->commit_write() then page wasn't marked as accessed and rebalanced. Signed-off-by: Monakhov Dmitriy Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/splice.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 07f6556add0..5428b0ff3b6 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -627,18 +627,25 @@ find_page: } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (!ret) { + if (ret) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } + if (ret < 0) + goto out; /* - * Return the number of bytes written and mark page as - * accessed, we are now done! + * Partial write has happened, so 'ret' already initialized by + * number of bytes written, Where is nothing we have to do here. */ + } else ret = this_len; - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); - } else if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto find_page; - } + /* + * Return the number of bytes written and mark page as + * accessed, we are now done! + */ + mark_page_accessed(page); + balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); -- cgit v1.2.3 From 622e696938c6a9c5357d2ec4a07ed2f27d56925a Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 29 Mar 2007 01:20:32 -0700 Subject: [PATCH] uml: fix compilation problems Fix a few miscellaneous compilation problems - an assignment with mismatched types in ldt.c a missing include in mconsole.h which needs a definition of uml_pt_regs I missed removing an include of user_util.h in hostfs Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs_kern.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 9baf69773ed..a2667db9f6b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -20,7 +20,6 @@ #include "hostfs.h" #include "kern_util.h" #include "kern.h" -#include "user_util.h" #include "init.h" struct hostfs_inode_info { -- cgit v1.2.3 From 75e8defbe4236a358b1396bc6d9a1231e5eca225 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 29 Mar 2007 01:20:33 -0700 Subject: [PATCH] uml: hostfs variable renaming * rename name to host_root_path * rename data to req_root. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs_kern.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index a2667db9f6b..fd301a91012 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -938,7 +938,7 @@ static const struct address_space_operations hostfs_link_aops = { static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) { struct inode *root_inode; - char *name, *data = d; + char *host_root_path, *req_root = d; int err; sb->s_blocksize = 1024; @@ -947,16 +947,16 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_op = &hostfs_sbops; /* NULL is printed as by sprintf: avoid that. */ - if (data == NULL) - data = ""; + if (req_root == NULL) + req_root = ""; err = -ENOMEM; - name = kmalloc(strlen(root_ino) + 1 - + strlen(data) + 1, GFP_KERNEL); - if(name == NULL) + host_root_path = kmalloc(strlen(root_ino) + 1 + + strlen(req_root) + 1, GFP_KERNEL); + if(host_root_path == NULL) goto out; - sprintf(name, "%s/%s", root_ino, data); + sprintf(host_root_path, "%s/%s", root_ino, req_root); root_inode = iget(sb, 0); if(root_inode == NULL) @@ -966,10 +966,10 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if(err) goto out_put; - HOSTFS_I(root_inode)->host_filename = name; - /* Avoid that in the error path, iput(root_inode) frees again name through - * hostfs_destroy_inode! */ - name = NULL; + HOSTFS_I(root_inode)->host_filename = host_root_path; + /* Avoid that in the error path, iput(root_inode) frees again + * host_root_path through hostfs_destroy_inode! */ + host_root_path = NULL; err = -ENOMEM; sb->s_root = d_alloc_root(root_inode); @@ -989,7 +989,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) out_put: iput(root_inode); out_free: - kfree(name); + kfree(host_root_path); out: return(err); } -- cgit v1.2.3 From 05565b65a5309e3e5c86db1975b57f75661bee8f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 1 Apr 2007 23:49:35 -0700 Subject: [PATCH] proc: fix linkage with CONFIG_SYSCTL=y, CONFIG_PROC_SYSCTL=n We're using #ifdef CONFIG_SYSCTL, but we should be using CONFIG_PROC_SYSCTL, so we get fs/built-in.o: In function `proc_root_init': /usr/src/linux/fs/proc/root.c:83: undefined reference to `proc_sys_init' Fix that up and remove an ifdef-in-C. Cc: "Eric W. Biederman" Cc: Helge Hafting Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 4 ++++ fs/proc/root.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c932aa65e19..f771889183c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,7 +11,11 @@ #include +#ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); +#else +static inline void proc_sys_init(void) { } +#endif struct vmalloc_info { unsigned long used; diff --git a/fs/proc/root.c b/fs/proc/root.c index 5834a744c2a..41f17037f73 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -79,9 +79,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); -#ifdef CONFIG_SYSCTL proc_sys_init(); -#endif } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat -- cgit v1.2.3 From 03221702608c60b470fc86a23bdf4bc30e5bd59f Mon Sep 17 00:00:00 2001 From: Brian Pomerantz Date: Sun, 1 Apr 2007 23:49:41 -0700 Subject: [PATCH] fix page leak during core dump When the dump cannot occur most likely because of a full file system and the page to be written is the zero page, the call to page_cache_release() is missed. Signed-off-by: Brian Pomerantz Cc: Hugh Dickins Cc: Nick Piggin Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 5 ++++- fs/binfmt_elf_fdpic.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a2fceba7ef8..9cc4f0a8aaa 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1704,7 +1704,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - DUMP_SEEK(PAGE_SIZE); + if (!dump_seek(file, PAGE_SIZE)) { + page_cache_release(page); + goto end_coredump; + } } else { void *kaddr; flush_cache_page(vma, addr, diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 47d6d49d1fb..f3ddca4a387 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1480,8 +1480,8 @@ static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, DUMP_SEEK(file->f_pos + PAGE_SIZE); } else if (page == ZERO_PAGE(addr)) { - DUMP_SEEK(file->f_pos + PAGE_SIZE); page_cache_release(page); + DUMP_SEEK(file->f_pos + PAGE_SIZE); } else { void *kaddr; -- cgit v1.2.3 From 1aa9b4b9bc10a0cf6e6109c2997d759a76e840e5 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 1 Apr 2007 23:49:43 -0700 Subject: [PATCH] revert "retries in ext3_prepare_write() violate ordering requirements" Revert e92a4d595b464c4aae64be39ca61a9ffe9c8b278. Dmitry points out "When we block_prepare_write() failed while ext3_prepare_write() we jump to "failure" label and call ext3_prepare_failure() witch search last mapped bh and invoke commit_write untill it. This is wrong!! because some bh from begining to the last mapped bh may be not uptodate. As a result we commit to disk not uptodate page content witch contains garbage from previous usage." and "Unexpected file size increasing." Call trace the same as it was in first issue but result is different. For example we have file with i_size is zero. we want write two blocks , but fs has only one free block. ->ext3_prepare_write(...from == 0, to == 2048) retry: ->block_prepare_write() == -ENOSPC# we failed but allocated one block here. ->ext3_prepare_failure() ->commit_write( from == 0, to == 1024) # after this i_size becomes 1024 :) if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; Finally when all retries will be spended ext3_prepare_failure return -ENOSPC, but i_size was increased and later block trimm procedures can't help here. We don't appear to have the horsepower to fix these issues, so let's put things back the way they were for now. Cc: Kirill Korotaev Cc: Ingo Molnar Cc: Ken Chen Cc: Andrey Savochkin Cc: Cc: Dmitriy Monakhov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 85 +++++++-------------------------------------------------- 1 file changed, 10 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 8a824f4ce5c..a5b150f7e8a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1148,102 +1148,37 @@ static int do_journal_get_write_access(handle_t *handle, return ext3_journal_get_write_access(handle, bh); } -/* - * The idea of this helper function is following: - * if prepare_write has allocated some blocks, but not all of them, the - * transaction must include the content of the newly allocated blocks. - * This content is expected to be set to zeroes by block_prepare_write(). - * 2006/10/14 SAW - */ -static int ext3_prepare_failure(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct address_space *mapping; - struct buffer_head *bh, *head, *next; - unsigned block_start, block_end; - unsigned blocksize; - int ret; - handle_t *handle = ext3_journal_current_handle(); - - mapping = page->mapping; - if (ext3_should_writeback_data(mapping->host)) { - /* optimization: no constraints about data */ -skip: - return ext3_journal_stop(handle); - } - - head = page_buffers(page); - blocksize = head->b_size; - for ( bh = head, block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from) - continue; - if (block_start >= to) { - block_start = to; - break; - } - if (!buffer_mapped(bh)) - /* prepare_write failed on this bh */ - break; - if (ext3_should_journal_data(mapping->host)) { - ret = do_journal_get_write_access(handle, bh); - if (ret) { - ext3_journal_stop(handle); - return ret; - } - } - /* - * block_start here becomes the first block where the current iteration - * of prepare_write failed. - */ - } - if (block_start <= from) - goto skip; - - /* commit allocated and zeroed buffers */ - return mapping->a_ops->commit_write(file, page, from, block_start); -} - static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, ret2; - int needed_blocks = ext3_writepage_trans_blocks(inode); + int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext3_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext3_get_block); else ret = block_prepare_write(page, from, to, ext3_get_block); if (ret) - goto failure; + goto prepare_write_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); - if (ret) - /* fatal error, just put the handle and return */ - journal_stop(handle); } - return ret; - -failure: - ret2 = ext3_prepare_failure(file, page, from, to); - if (ret2 < 0) - return ret2; +prepare_write_failed: + if (ret) + ext3_journal_stop(handle); if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; - /* retry number exceeded, or other error like -EDQUOT */ +out: return ret; } -- cgit v1.2.3 From 7479d2b90b103f84d956a7177b3f99cbd472b345 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 1 Apr 2007 23:49:44 -0700 Subject: [PATCH] revert "retries in ext4_prepare_write() violate ordering requirements" Revert b46be05004abb419e303e66e143eed9f8a6e9f3f. Same reasoning as for ext3. Cc: Kirill Korotaev Cc: Ingo Molnar Cc: Ken Chen Cc: Andrey Savochkin Cc: Cc: Dmitriy Monakhov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/inode.c | 85 +++++++-------------------------------------------------- 1 file changed, 10 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fbff4b9e122..810b6d6474b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1147,102 +1147,37 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } -/* - * The idea of this helper function is following: - * if prepare_write has allocated some blocks, but not all of them, the - * transaction must include the content of the newly allocated blocks. - * This content is expected to be set to zeroes by block_prepare_write(). - * 2006/10/14 SAW - */ -static int ext4_prepare_failure(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct address_space *mapping; - struct buffer_head *bh, *head, *next; - unsigned block_start, block_end; - unsigned blocksize; - int ret; - handle_t *handle = ext4_journal_current_handle(); - - mapping = page->mapping; - if (ext4_should_writeback_data(mapping->host)) { - /* optimization: no constraints about data */ -skip: - return ext4_journal_stop(handle); - } - - head = page_buffers(page); - blocksize = head->b_size; - for ( bh = head, block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from) - continue; - if (block_start >= to) { - block_start = to; - break; - } - if (!buffer_mapped(bh)) - /* prepare_write failed on this bh */ - break; - if (ext4_should_journal_data(mapping->host)) { - ret = do_journal_get_write_access(handle, bh); - if (ret) { - ext4_journal_stop(handle); - return ret; - } - } - /* - * block_start here becomes the first block where the current iteration - * of prepare_write failed. - */ - } - if (block_start <= from) - goto skip; - - /* commit allocated and zeroed buffers */ - return mapping->a_ops->commit_write(file, page, from, block_start); -} - static int ext4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, ret2; - int needed_blocks = ext4_writepage_trans_blocks(inode); + int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext4_get_block); else ret = block_prepare_write(page, from, to, ext4_get_block); if (ret) - goto failure; + goto prepare_write_failed; if (ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); - if (ret) - /* fatal error, just put the handle and return */ - ext4_journal_stop(handle); } - return ret; - -failure: - ret2 = ext4_prepare_failure(file, page, from, to); - if (ret2 < 0) - return ret2; +prepare_write_failed: + if (ret) + ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - /* retry number exceeded, or other error like -EDQUOT */ +out: return ret; } -- cgit v1.2.3 From 2363cc0264c42636e9e7622f78dde5c2f66beb8e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 4 Apr 2007 19:08:22 -0700 Subject: [PATCH] remove protection of LANANA-reserved majors Revert all this. It can cause device-mapper to receive a different major from earlier kernels and it turns out that the Amanda backup program (via GNU tar, apparently) checks major numbers on files when performing incremental backups. Which is a bit broken of Amanda (or tar), but this feature isn't important enough to justify the churn. Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/char_dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index 78ced721554..164a45cdaf5 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -109,8 +109,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, /* temporary */ if (major == 0) { for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) { - if (is_lanana_major(i)) - continue; if (chrdevs[i] == NULL) break; } -- cgit v1.2.3 From a5bfffac645a7b2d8119f8bbae34df5c94832799 Mon Sep 17 00:00:00 2001 From: Timo Savola Date: Sun, 8 Apr 2007 16:04:00 -0700 Subject: [PATCH] fuse: validate rootmode mount option If rootmode isn't valid, we hit the BUG() in fuse_init_inode. Now EINVAL is returned. Signed-off-by: Timo Savola Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 5 +++-- fs/fuse/fuse_i.h | 5 +++++ fs/fuse/inode.c | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 406bf61ed51..8890eba1db5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -195,7 +195,7 @@ static struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; -static int valid_mode(int m) +int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); @@ -248,7 +248,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (!err && outarg.nodeid && - (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode))) + (invalid_nodeid(outarg.nodeid) || + !fuse_valid_type(outarg.attr.mode))) err = -EIO; if (!err && outarg.nodeid) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b98b20de740..68ae87cbafa 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -552,3 +552,8 @@ int fuse_ctl_add_conn(struct fuse_conn *fc); * Remove connection from control filesystem */ void fuse_ctl_remove_conn(struct fuse_conn *fc); + +/** + * Is file type valid? + */ +int fuse_valid_type(int m); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5ab8e50e780..608db81219a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -330,6 +330,8 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) case OPT_ROOTMODE: if (match_octal(&args[0], &value)) return 0; + if (!fuse_valid_type(value)) + return 0; d->rootmode = value; d->rootmode_present = 1; break; -- cgit v1.2.3 From 6d205f120547043de663315698dcf5f0eaa31b5c Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Wed, 11 Apr 2007 23:28:44 -0700 Subject: [PATCH] reiserfs: fix key decrementing This patch fixes a bug in function decrementing a key of stat data item. Offset of reiserfs keys are compared as signed values. To set key offset to maximal possible value maximal signed value has to be used. This bug is responsible for severe reiserfs filesystem corruption which shows itself as warning vs-13060. reiserfsck fixes this corruption by filesystem tree rebuilding. Signed-off-by: Vladimir Saveliev Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/item_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index b9b423b22a8..9475557ab49 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -23,7 +23,7 @@ static void sd_decrement_key(struct cpu_key *key) { key->on_disk_key.k_objectid--; set_cpu_key_k_type(key, TYPE_ANY); - set_cpu_key_k_offset(key, (loff_t) (-1)); + set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1)); } static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize) -- cgit v1.2.3 From c3724b129b5a1a1789a2dc5348685a236ae02479 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 11 Apr 2007 23:28:46 -0700 Subject: [PATCH] autofs4: fix race in unhashed dentry code Commit f50b6f8691cae2e0064c499dd3ef3f31142987f0 introduced a race in autofs4 between autofs_lookup_unhashed() and autofs_dentry_release(). autofs_dentry_release() ends up clearing the ->dentry and ->inode members of autofs_info before removing it from the rehash list. The list is protected by the rehash lock in both functions, but since autofs_dentry_release() starts tearing the autofs_info struct down before removing it from the list, autofs_lookup_unhashed() can get a autofs_info with a NULL dentry. This patch moves the clearing of ->dentry and ->inode after the removal from the rehash list. Signed-off-by: Jeff Mahoney Acked-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index b4631046867..d0e9b3a3905 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -470,9 +470,6 @@ void autofs4_dentry_release(struct dentry *de) if (inf) { struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); - inf->dentry = NULL; - inf->inode = NULL; - if (sbi) { spin_lock(&sbi->rehash_lock); if (!list_empty(&inf->rehash)) @@ -480,6 +477,9 @@ void autofs4_dentry_release(struct dentry *de) spin_unlock(&sbi->rehash_lock); } + inf->dentry = NULL; + inf->inode = NULL; + autofs4_free_ino(inf); } } -- cgit v1.2.3 From e1552e199857109d4b25b9163eff4646726eee3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Apr 2007 19:07:28 -0400 Subject: NFS: Fix an Oops in nfs_setattr() It looks like nfs_setattr() and nfs_rename() also need to test whether the target is a regular file before calling nfs_wb_all()... Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 3 ++- fs/nfs/inode.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 92d8ec859e2..cd3469720cb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1684,7 +1684,8 @@ go_ahead: * ... prune child dentries and writebacks if needed. */ if (atomic_read(&old_dentry->d_count) > 1) { - nfs_wb_all(old_inode); + if (S_ISREG(old_inode->i_mode)) + nfs_wb_all(old_inode); shrink_dcache_parent(old_dentry); } nfs_inode_return_delegation(old_inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 93d046c85f5..44aa9b72657 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -341,8 +341,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) lock_kernel(); nfs_begin_data_update(inode); /* Write all dirty data */ - filemap_write_and_wait(inode->i_mapping); - nfs_wb_all(inode); + if (S_ISREG(inode->i_mode)) { + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); + } /* * Return any delegations if we're going to change ACLs */ -- cgit v1.2.3 From 60fa3f769f7651a60125a0f44e3ffe3246d7cf39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Apr 2007 19:11:52 -0400 Subject: NFS: Fix two bugs in the O_DIRECT write code Do not flag an error if the COMMIT call fails and we decide to resend the writes. Let the resend flag the error if it fails. If a write has failed, then nfs_direct_write_result should not attempt to send a commit. It should just exit asap and return the error to the user. Signed-off-by: Trond Myklebust Cc: Chuck Lever Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b1c98ea39b7..2877744cb60 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -432,10 +432,10 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) return; if (unlikely(task->tk_status < 0)) { - dreq->error = task->tk_status; + dprintk("NFS: %5u commit failed with error %d.\n", + task->tk_pid, task->tk_status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { dprintk("NFS: %5u commit verify failed\n", task->tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } @@ -531,9 +531,12 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) spin_lock(&dreq->lock); + if (unlikely(dreq->error != 0)) + goto out_unlock; if (unlikely(status < 0)) { + /* An error has occured, so we should not commit */ + dreq->flags = 0; dreq->error = status; - goto out_unlock; } dreq->count += data->res.count; -- cgit v1.2.3 From 5a6d41b32a17ca902ef50fdfa170d7f23264bad5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Apr 2007 19:10:12 -0400 Subject: NFS: Ensure PG_writeback is cleared when writeback fails If the writebacks are cancelled via nfs_cancel_dirty_list, or due to the memory allocation failing in nfs_flush_one/nfs_flush_multi, then we must ensure that the PG_writeback flag is cleared. Also ensure that we actually own the PG_writeback flag whenever we schedule a new writeback by making nfs_set_page_writeback() return the value of test_set_page_writeback(). The PG_writeback page flag ends up replacing the functionality of the PG_FLUSHING nfs_page flag, so we rip that out too. Signed-off-by: Trond Myklebust Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2867e6b7096..e5d7cac569a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -218,9 +218,11 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static void nfs_set_page_writeback(struct page *page) +static int nfs_set_page_writeback(struct page *page) { - if (!test_set_page_writeback(page)) { + int ret = test_set_page_writeback(page); + + if (!ret) { struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); @@ -228,6 +230,7 @@ static void nfs_set_page_writeback(struct page *page) NFS_CONGESTION_ON_THRESH) set_bdi_congested(&nfss->backing_dev_info, WRITE); } + return ret; } static void nfs_end_page_writeback(struct page *page) @@ -277,10 +280,8 @@ static int nfs_page_mark_flush(struct page *page) spin_lock(req_lock); } spin_unlock(req_lock); - if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { + if (nfs_set_page_writeback(page) == 0) nfs_mark_request_dirty(req); - nfs_set_page_writeback(page); - } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; @@ -424,7 +425,6 @@ nfs_mark_request_dirty(struct nfs_page *req) static void nfs_redirty_request(struct nfs_page *req) { - clear_bit(PG_FLUSHING, &req->wb_flags); __set_page_dirty_nobuffers(req->wb_page); } @@ -434,7 +434,11 @@ nfs_redirty_request(struct nfs_page *req) static inline int nfs_dirty_request(struct nfs_page *req) { - return test_bit(PG_FLUSHING, &req->wb_flags) == 0; + struct page *page = req->wb_page; + + if (page == NULL) + return 0; + return !PageWriteback(req->wb_page); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -500,6 +504,7 @@ static void nfs_cancel_dirty_list(struct list_head *head) while(!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); nfs_clear_page_writeback(req); } @@ -890,6 +895,7 @@ out_bad: list_del(&data->pages); nfs_writedata_release(data); } + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); return -ENOMEM; @@ -935,6 +941,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); } @@ -970,6 +977,7 @@ out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); } -- cgit v1.2.3 From eb4cac10d9f7b006da842e2d37414d13e1333781 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 15 Apr 2007 16:21:49 -0400 Subject: NFS: Fix a list corruption problem We must remove the request from whatever list it is currently on before we can add it to the dirty list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e5d7cac569a..ad2e91b4904 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -280,8 +280,10 @@ static int nfs_page_mark_flush(struct page *page) spin_lock(req_lock); } spin_unlock(req_lock); - if (nfs_set_page_writeback(page) == 0) + if (nfs_set_page_writeback(page) == 0) { + nfs_list_remove_request(req); nfs_mark_request_dirty(req); + } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; -- cgit v1.2.3 From c4bbafda70a0fc95c6595bffd6825ef264050d01 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 16 Apr 2007 22:53:13 -0700 Subject: exec.c: fix coredump to pipe problem and obscure "security hole" The patch checks for "|" in the pattern not the output and doesn't nail a pid on to a piped name (as it is a program name not a file) Also fixes a very very obscure security corner case. If you happen to have decided on a core pattern that starts with the program name then the user can run a program called "|myevilhack" as it stands. I doubt anyone does this. Signed-off-by: Alan Cox Confirmed-by: Christopher S. Aker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 7e36c6f6f53..3155e915307 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1244,13 +1244,17 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static void format_corename(char *corename, const char *pattern, long signr) +static int format_corename(char *corename, const char *pattern, long signr) { const char *pat_ptr = pattern; char *out_ptr = corename; char *const out_end = corename + CORENAME_MAX_SIZE; int rc; int pid_in_pattern = 0; + int ispipe = 0; + + if (*pattern == '|') + ispipe = 1; /* Repeat as long as we have more pattern to process and more output space */ @@ -1341,8 +1345,8 @@ static void format_corename(char *corename, const char *pattern, long signr) * * If core_pattern does not include a %p (as is the default) * and core_uses_pid is set, then .%pid will be appended to - * the filename */ - if (!pid_in_pattern + * the filename. Do not do this for piped commands. */ + if (!ispipe && !pid_in_pattern && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { rc = snprintf(out_ptr, out_end - out_ptr, ".%d", current->tgid); @@ -1350,8 +1354,9 @@ static void format_corename(char *corename, const char *pattern, long signr) goto out; out_ptr += rc; } - out: +out: *out_ptr = 0; + return ispipe; } static void zap_process(struct task_struct *start) @@ -1502,16 +1507,15 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * uses lock_kernel() */ lock_kernel(); - format_corename(corename, core_pattern, signr); + ispipe = format_corename(corename, core_pattern, signr); unlock_kernel(); - if (corename[0] == '|') { + if (ispipe) { /* SIGPIPE can happen, but it's just never processed */ if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) { printk(KERN_INFO "Core dump to %s pipe failed\n", corename); goto fail_unlock; } - ispipe = 1; } else file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, -- cgit v1.2.3 From 07a0cfec30848319cc86f21cce0d2efeca593e1a Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Mon, 16 Apr 2007 22:53:24 -0700 Subject: ufs proper handling of zero link case This patch should fix or partly fix this bug: http://bugzilla.kernel.org/show_bug.cgi?id=8276 The problem is: - if we see "zero link case" during reading inode operation, we call ufs_error(which remount fs readonly), but not "mark" inode as bad (1) - in readonly case we do not fill some data structures, which are used in read and write case (2) - VFS call ufs_delete_inode if link count is zero (3) so (1)->(3)->(2) cause oops, this patch should fix such scenario Signed-off-by: Evgeniy Dushistov Cc: Jim Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/inode.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 013d7afe7cd..f18b79122fa 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -601,7 +601,7 @@ static void ufs_set_inode_ops(struct inode *inode) ufs_get_inode_dev(inode->i_sb, UFS_I(inode))); } -static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) +static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -613,8 +613,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); + return -1; + } /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -643,9 +645,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i]; } + return 0; } -static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) +static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -658,8 +661,10 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); + return -1; + } /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -690,6 +695,7 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i]; } + return 0; } void ufs_read_inode(struct inode * inode) @@ -698,6 +704,7 @@ void ufs_read_inode(struct inode * inode) struct super_block * sb; struct ufs_sb_private_info * uspi; struct buffer_head * bh; + int err; UFSD("ENTER, ino %lu\n", inode->i_ino); @@ -720,14 +727,17 @@ void ufs_read_inode(struct inode * inode) if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data; - ufs2_read_inode(inode, - ufs2_inode + ufs_inotofsbo(inode->i_ino)); + err = ufs2_read_inode(inode, + ufs2_inode + ufs_inotofsbo(inode->i_ino)); } else { struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data; - ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); + err = ufs1_read_inode(inode, + ufs_inode + ufs_inotofsbo(inode->i_ino)); } + if (err) + goto bad_inode; inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -888,6 +898,8 @@ void ufs_delete_inode (struct inode * inode) loff_t old_i_size; truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) + goto no_delete; /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ lock_kernel(); mark_inode_dirty(inode); @@ -898,4 +910,7 @@ void ufs_delete_inode (struct inode * inode) ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n"); ufs_free_inode (inode); unlock_kernel(); + return; +no_delete: + clear_inode(inode); /* We must guarantee clearing of inode... */ } -- cgit v1.2.3 From 8e821cad12e80cd1a8a3fbadf91f62f17f32549e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:34 -0400 Subject: NFS: clean up the unstable write code Get rid of the inlined #ifdefs. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 117 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ad2e91b4904..3ed4feb8c85 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -460,6 +460,43 @@ nfs_mark_request_commit(struct nfs_page *req) inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return data->verf.committed != NFS_FILE_SYNC; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_mark_request_commit(req); + return 1; + } + if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { + nfs_redirty_request(req); + return 1; + } + return 0; +} +#else +static inline void +nfs_mark_request_commit(struct nfs_page *req) +{ +} + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return 0; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + return 0; +} #endif /* @@ -746,26 +783,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - nfs_end_page_writeback(req->wb_page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (!PageError(req->wb_page)) { - if (NFS_NEED_RESCHED(req)) { - nfs_redirty_request(req); - goto out; - } else if (NFS_NEED_COMMIT(req)) { - nfs_mark_request_commit(req); - goto out; - } - } - nfs_inode_remove_request(req); - -out: - nfs_clear_commit(req); - nfs_clear_reschedule(req); -#else - nfs_inode_remove_request(req); -#endif + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } @@ -1008,22 +1031,28 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) nfs_set_pageerror(page); req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); - } else { -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->verf.committed < NFS_FILE_SYNC) { - if (!NFS_NEED_COMMIT(req)) { - nfs_defer_commit(req); - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - dprintk(" defer commit\n"); - } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { - nfs_defer_reschedule(req); - dprintk(" server reboot detected\n"); - } - } else -#endif - dprintk(" OK\n"); + goto out; } + if (nfs_write_need_commit(data)) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + + spin_lock(req_lock); + if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { + /* Do nothing we need to resend the writes */ + } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + set_bit(PG_NEED_RESCHED, &req->wb_flags); + clear_bit(PG_NEED_COMMIT, &req->wb_flags); + dprintk(" server reboot detected\n"); + } + spin_unlock(req_lock); + } else + dprintk(" OK\n"); + +out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req); } @@ -1064,25 +1093,21 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); req->wb_context->error = task->tk_status; - nfs_end_page_writeback(page); - nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); - goto next; + goto remove_request; } - nfs_end_page_writeback(page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { - nfs_inode_remove_request(req); - dprintk(" OK\n"); + if (nfs_write_need_commit(data)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + nfs_end_page_writeback(page); + dprintk(" marked for commit\n"); goto next; } - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); - dprintk(" marked for commit\n"); -#else + dprintk(" OK\n"); +remove_request: + nfs_end_page_writeback(page); nfs_inode_remove_request(req); -#endif next: nfs_clear_page_writeback(req); } -- cgit v1.2.3 From 6d677e3504cd173b2ff7fc393ee4241b3c0f92a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:40 -0400 Subject: NFS: Don't clear PG_writeback until after we've processed unstable writes Ensure that we don't release the PG_writeback lock until after the page has either been redirtied, or queued on the nfs_inode 'commit' list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3ed4feb8c85..8e9424651bc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -920,8 +920,8 @@ out_bad: list_del(&data->pages); nfs_writedata_release(data); } - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); return -ENOMEM; } @@ -966,8 +966,8 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return -ENOMEM; @@ -1002,8 +1002,8 @@ out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return error; -- cgit v1.2.3 From 612c9384fd0486686699f7d49b774f0c7a79c511 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:45 -0400 Subject: NFS: Fix the 'desynchronized value of nfs_i.ncommit' error Redirtying a request that is already marked for commit will screw up the accounting for NR_UNSTABLE_NFS as well as nfs_i.ncommit. Ensure that all requests on the commit queue are labelled with the PG_NEED_COMMIT flag, and avoid moving them onto the dirty list inside nfs_page_mark_flush(). Also inline nfs_mark_request_dirty() into nfs_page_mark_flush() for atomicity reasons. Avoid dropping the spinlock until we're done marking the request in the radix tree and have added it to the ->dirty list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8e9424651bc..ce5b4a9f2d8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -38,7 +38,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); -static void nfs_mark_request_dirty(struct nfs_page *req); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -255,7 +254,8 @@ static void nfs_end_page_writeback(struct page *page) static int nfs_page_mark_flush(struct page *page) { struct nfs_page *req; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + spinlock_t *req_lock = &nfsi->req_lock; int ret; spin_lock(req_lock); @@ -279,11 +279,23 @@ static int nfs_page_mark_flush(struct page *page) return ret; spin_lock(req_lock); } - spin_unlock(req_lock); + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + /* This request is marked for commit */ + spin_unlock(req_lock); + nfs_unlock_request(req); + return 1; + } if (nfs_set_page_writeback(page) == 0) { nfs_list_remove_request(req); - nfs_mark_request_dirty(req); - } + /* add the request to the inode's dirty list. */ + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_add_request(req, &nfsi->dirty); + nfsi->ndirty++; + spin_unlock(req_lock); + __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES); + } else + spin_unlock(req_lock); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; @@ -406,24 +418,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfs_release_request(req); } -/* - * Add a request to the inode's dirty list. - */ -static void -nfs_mark_request_dirty(struct nfs_page *req) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - - spin_lock(&nfsi->req_lock); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfs_list_add_request(req, &nfsi->dirty); - nfsi->ndirty++; - spin_unlock(&nfsi->req_lock); - __mark_inode_dirty(inode, I_DIRTY_PAGES); -} - static void nfs_redirty_request(struct nfs_page *req) { @@ -438,7 +432,7 @@ nfs_dirty_request(struct nfs_page *req) { struct page *page = req->wb_page; - if (page == NULL) + if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) return 0; return !PageWriteback(req->wb_page); } @@ -456,6 +450,7 @@ nfs_mark_request_commit(struct nfs_page *req) spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; + set_bit(PG_NEED_COMMIT, &(req)->wb_flags); spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -470,7 +465,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) static inline int nfs_reschedule_unstable_write(struct nfs_page *req) { - if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { nfs_mark_request_commit(req); return 1; } @@ -557,6 +552,7 @@ static void nfs_cancel_commit_list(struct list_head *head) req = nfs_list_entry(head->next); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); nfs_inode_remove_request(req); nfs_unlock_request(req); } @@ -1295,6 +1291,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", -- cgit v1.2.3 From 2b82f190c81bf1524447c021df4e9ce8ef379bd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:50 -0400 Subject: NFS: Fix race in nfs_set_page_dirty Protect nfs_set_page_dirty() against races with nfs_inode_add_request. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce5b4a9f2d8..79755894174 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -388,6 +388,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); + if (PageDirty(req->wb_page)) + set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -407,6 +409,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) + __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); @@ -1527,15 +1531,22 @@ int nfs_wb_page(struct inode *inode, struct page* page) int nfs_set_page_dirty(struct page *page) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; struct nfs_page *req; + int ret; - req = nfs_page_find_request(page); + spin_lock(req_lock); + req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ - set_bit(PG_NEED_FLUSH, &req->wb_flags); + ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); + spin_unlock(req_lock); nfs_release_request(req); + return ret; } - return __set_page_dirty_nobuffers(page); + ret = __set_page_dirty_nobuffers(page); + spin_unlock(req_lock); + return ret; } -- cgit v1.2.3 From c959df9f01cfb2f43b4d1f58631ee1e9c50541b6 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Mon, 23 Apr 2007 14:41:11 -0700 Subject: v9fs: don't use primary fid when removing file v9fs_insert uses v9fs_fid_lookup (which also locks the fid) to get the primary fid associated with the dentry and destroys the v9fs_fid struct after removing the file. If another process called v9fs_fid_lookup on the same dentry, it may wait undefinitely for the fid's lock (as the struct is freed). This patch changes v9fs_remove to use a cloned fid, so the primary fid is not locked and freed. Signed-off-by: Latchesar Ionkov Cc: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 124a085d1f2..b01b0a45793 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -415,7 +415,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) file_inode = file->d_inode; sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); - v9fid = v9fs_fid_lookup(file); + v9fid = v9fs_fid_clone(file); if(IS_ERR(v9fid)) return PTR_ERR(v9fid); -- cgit v1.2.3 From 9b7f375505f5611efb562065b57814b28a81abc3 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 23 Apr 2007 14:41:17 -0700 Subject: reiserfs: fix xattr root locking/refcount bug The listxattr() and getxattr() operations are only protected by a read lock. As a result, if either of these operations run in parallel, a race condition exists where the xattr_root will end up being cached twice, which results in the leaking of a reference and a BUG() on umount. This patch refactors get_xa_root(), __get_xa_root(), and create_xa_root(), into one get_xa_root() function that takes the appropriate locking around the entire critical section. Reported, diagnosed and tested by Andrea Righi Signed-off-by: Jeff Mahoney Cc: Andrea Righi Cc: "Vladimir V. Saveliev" Cc: Edward Shishkin Cc: Alex Zarochentsev Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 92 ++++++++++++++--------------------------------------- 1 file changed, 24 insertions(+), 68 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f01389fd162..c8178b7b921 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -54,82 +54,48 @@ static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *prefix); -static struct dentry *create_xa_root(struct super_block *sb) +/* Returns the dentry referring to the root of the extended attribute + * directory tree. If it has already been retrieved, it is used. If it + * hasn't been created and the flags indicate creation is allowed, we + * attempt to create it. On error, we return a pointer-encoded error. + */ +static struct dentry *get_xa_root(struct super_block *sb, int flags) { struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); struct dentry *xaroot; /* This needs to be created at mount-time */ if (!privroot) - return ERR_PTR(-EOPNOTSUPP); + return ERR_PTR(-ENODATA); - xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); - if (IS_ERR(xaroot)) { + mutex_lock(&privroot->d_inode->i_mutex); + if (REISERFS_SB(sb)->xattr_root) { + xaroot = dget(REISERFS_SB(sb)->xattr_root); goto out; - } else if (!xaroot->d_inode) { - int err; - mutex_lock(&privroot->d_inode->i_mutex); - err = - privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, - 0700); - mutex_unlock(&privroot->d_inode->i_mutex); - - if (err) { - dput(xaroot); - dput(privroot); - return ERR_PTR(err); - } - REISERFS_SB(sb)->xattr_root = dget(xaroot); } - out: - dput(privroot); - return xaroot; -} - -/* This will return a dentry, or error, refering to the xa root directory. - * If the xa root doesn't exist yet, the dentry will be returned without - * an associated inode. This dentry can be used with ->mkdir to create - * the xa directory. */ -static struct dentry *__get_xa_root(struct super_block *s) -{ - struct dentry *privroot = dget(REISERFS_SB(s)->priv_root); - struct dentry *xaroot = NULL; - - if (IS_ERR(privroot) || !privroot) - return privroot; - xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); if (IS_ERR(xaroot)) { goto out; } else if (!xaroot->d_inode) { - dput(xaroot); - xaroot = NULL; - goto out; + int err = -ENODATA; + if (flags == 0 || flags & XATTR_CREATE) + err = privroot->d_inode->i_op->mkdir(privroot->d_inode, + xaroot, 0700); + if (err) { + dput(xaroot); + xaroot = ERR_PTR(err); + goto out; + } } - - REISERFS_SB(s)->xattr_root = dget(xaroot); + REISERFS_SB(sb)->xattr_root = dget(xaroot); out: + mutex_unlock(&privroot->d_inode->i_mutex); dput(privroot); return xaroot; } -/* Returns the dentry (or NULL) referring to the root of the extended - * attribute directory tree. If it has already been retrieved, it is used. - * Otherwise, we attempt to retrieve it from disk. It may also return - * a pointer-encoded error. - */ -static inline struct dentry *get_xa_root(struct super_block *s) -{ - struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root); - - if (!dentry) - dentry = __get_xa_root(s); - - return dentry; -} - /* Opens the directory corresponding to the inode's extended attribute store. * If flags allow, the tree to the directory may be created. If creation is * prohibited, -ENODATA is returned. */ @@ -138,21 +104,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) struct dentry *xaroot, *xadir; char namebuf[17]; - xaroot = get_xa_root(inode->i_sb); - if (IS_ERR(xaroot)) { + xaroot = get_xa_root(inode->i_sb, flags); + if (IS_ERR(xaroot)) return xaroot; - } else if (!xaroot) { - if (flags == 0 || flags & XATTR_CREATE) { - xaroot = create_xa_root(inode->i_sb); - if (IS_ERR(xaroot)) - return xaroot; - } - if (!xaroot) - return ERR_PTR(-ENODATA); - } /* ok, we have xaroot open */ - snprintf(namebuf, sizeof(namebuf), "%X.%X", le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); @@ -821,7 +777,7 @@ int reiserfs_delete_xattrs(struct inode *inode) /* Leftovers besides . and .. -- that's not good. */ if (dir->d_inode->i_nlink <= 2) { - root = get_xa_root(inode->i_sb); + root = get_xa_root(inode->i_sb, XATTR_REPLACE); reiserfs_write_lock_xattrs(inode->i_sb); err = vfs_rmdir(root->d_inode, dir); reiserfs_write_unlock_xattrs(inode->i_sb); -- cgit v1.2.3