diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_flat.c | 17 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 21 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 73 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 3 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 9 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 6 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 1 | ||||
-rw-r--r-- | fs/inode.c | 40 | ||||
-rw-r--r-- | fs/jffs2/file.c | 2 | ||||
-rw-r--r-- | fs/namespace.c | 3 | ||||
-rw-r--r-- | fs/proc/base.c | 27 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 1 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 142 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 17 |
15 files changed, 219 insertions, 144 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 697f6b5f131..e92f229e3c6 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -828,15 +828,22 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; + bprm.cred = prepare_exec_creds(); + res = -ENOMEM; + if (!bprm.cred) + goto out; + res = prepare_binprm(&bprm); if (res <= (unsigned long)-4096) res = load_flat_file(&bprm, libs, id, NULL); - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); - bprm.file = NULL; - } + + abort_creds(bprm.cred); + +out: + allow_write_access(bprm.file); + fput(bprm.file); + return(res); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc84daee6bc..72a2b9c28e9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -265,10 +265,6 @@ static int caching_kthread(void *data) atomic_inc(&block_group->space_info->caching_threads); last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); -again: - /* need to make sure the commit_root doesn't disappear */ - down_read(&fs_info->extent_commit_sem); - /* * We don't want to deadlock with somebody trying to allocate a new * extent for the extent root while also trying to search the extent @@ -282,6 +278,10 @@ again: key.objectid = last; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); +again: + /* need to make sure the commit_root doesn't disappear */ + down_read(&fs_info->extent_commit_sem); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -304,6 +304,19 @@ again: if (need_resched() || btrfs_transaction_in_commit(fs_info)) { + leaf = path->nodes[0]; + + /* this shouldn't happen, but if the + * leaf is empty just move on. + */ + if (btrfs_header_nritems(leaf) == 0) + break; + /* + * we need to copy the key out so that + * we are sure the next search advances + * us forward in the btree. + */ + btrfs_item_key_to_cpu(leaf, &key, 0); btrfs_release_path(fs_info->extent_root, path); up_read(&fs_info->extent_commit_sem); schedule_timeout(1); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index af99b78b288..5edcee3a617 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -414,11 +414,29 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro u64 *offset, u64 *bytes) { u64 end; + u64 search_start, search_bytes; + int ret; again: end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; + /* + * XXX - this can go away after a few releases. + * + * since the only user of btrfs_remove_free_space is the tree logging + * stuff, and the only way to test that is under crash conditions, we + * want to have this debug stuff here just in case somethings not + * working. Search the bitmap for the space we are trying to use to + * make sure its actually there. If its not there then we need to stop + * because something has gone wrong. + */ + search_start = *offset; + search_bytes = *bytes; + ret = search_bitmap(block_group, bitmap_info, &search_start, + &search_bytes); + BUG_ON(ret < 0 || search_start != *offset); + if (*offset > bitmap_info->offset && *offset + *bytes > end) { bitmap_clear_bits(block_group, bitmap_info, *offset, end - *offset + 1); @@ -430,6 +448,7 @@ again: } if (*bytes) { + struct rb_node *next = rb_next(&bitmap_info->offset_index); if (!bitmap_info->bytes) { unlink_free_space(block_group, bitmap_info); kfree(bitmap_info->bitmap); @@ -438,16 +457,36 @@ again: recalculate_thresholds(block_group); } - bitmap_info = tree_search_offset(block_group, - offset_to_bitmap(block_group, - *offset), - 1, 0); - if (!bitmap_info) + /* + * no entry after this bitmap, but we still have bytes to + * remove, so something has gone wrong. + */ + if (!next) return -EINVAL; + bitmap_info = rb_entry(next, struct btrfs_free_space, + offset_index); + + /* + * if the next entry isn't a bitmap we need to return to let the + * extent stuff do its work. + */ if (!bitmap_info->bitmap) return -EAGAIN; + /* + * Ok the next item is a bitmap, but it may not actually hold + * the information for the rest of this free space stuff, so + * look for it, and if we don't find it return so we can try + * everything over again. + */ + search_start = *offset; + search_bytes = *bytes; + ret = search_bitmap(block_group, bitmap_info, &search_start, + &search_bytes); + if (ret < 0 || search_start != *offset) + return -EAGAIN; + goto again; } else if (!bitmap_info->bytes) { unlink_free_space(block_group, bitmap_info); @@ -644,8 +683,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, again: info = tree_search_offset(block_group, offset, 0, 0); if (!info) { - WARN_ON(1); - goto out_lock; + /* + * oops didn't find an extent that matched the space we wanted + * to remove, look for a bitmap instead + */ + info = tree_search_offset(block_group, + offset_to_bitmap(block_group, offset), + 1, 0); + if (!info) { + WARN_ON(1); + goto out_lock; + } } if (info->bytes < bytes && rb_next(&info->offset_index)) { @@ -957,8 +1005,15 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, if (cluster->block_group != block_group) goto out; - entry = tree_search_offset(block_group, search_start, 0, 0); - + /* + * search_start is the beginning of the bitmap, but at some point it may + * be a good idea to point to the actual start of the free area in the + * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only + * to 1 to make sure we get the bitmap entry + */ + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, search_start), + 1, 0); if (!entry || !entry->bitmap) goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 56fe83fa60c..272b9b2bea8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4785,8 +4785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * and the replacement file is large. Start IO on it now so * we don't add too much work to the end of the transaction */ - if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && - new_inode->i_size && + if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(old_inode->i_mapping); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e71264d1c2c..c04f7f21260 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2553,8 +2553,13 @@ int relocate_inode_pages(struct inode *inode, u64 start, u64 len) last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; /* make sure the dirty trick played by the caller work */ - ret = invalidate_inode_pages2_range(inode->i_mapping, - first_index, last_index); + while (1) { + ret = invalidate_inode_pages2_range(inode->i_mapping, + first_index, last_index); + if (ret != -EBUSY) + break; + schedule_timeout(HZ/10); + } if (ret) goto out_unlock; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index ecfbce836d3..3e2b90eaa23 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -208,7 +208,7 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, *total_in = 0; workspace = find_zlib_workspace(); - if (!workspace) + if (IS_ERR(workspace)) return -1; if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { @@ -366,7 +366,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, char *kaddr; workspace = find_zlib_workspace(); - if (!workspace) + if (IS_ERR(workspace)) return -ENOMEM; data_in = kmap(pages_in[page_in_index]); @@ -547,7 +547,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, return -ENOMEM; workspace = find_zlib_workspace(); - if (!workspace) + if (IS_ERR(workspace)) return -ENOMEM; workspace->inf_strm.next_in = data_in; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f28f070a60f..f91fd51b32e 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1905,6 +1905,7 @@ COMPATIBLE_IOCTL(FIONCLEX) COMPATIBLE_IOCTL(FIOASYNC) COMPATIBLE_IOCTL(FIONBIO) COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ +COMPATIBLE_IOCTL(FS_IOC_FIEMAP) /* 0x00 */ COMPATIBLE_IOCTL(FIBMAP) COMPATIBLE_IOCTL(FIGETBSZ) diff --git a/fs/inode.c b/fs/inode.c index 901bad1e5f1..ae7b67e4866 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode) * These are initializations that need to be done on every inode * allocation as the fields are not initialised by slab allocation. */ -struct inode *inode_init_always(struct super_block *sb, struct inode *inode) +int inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct address_space *const mapping = &inode->i_data; inode->i_sb = sb; @@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->dirtied_when = 0; if (security_inode_alloc(inode)) - goto out_free_inode; + goto out; /* allocate and initialize an i_integrity */ if (ima_inode_alloc(inode)) @@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif - return inode; + return 0; out_free_security: security_inode_free(inode); -out_free_inode: - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, (inode)); - return NULL; +out: + return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); @@ -220,12 +215,21 @@ static struct inode *alloc_inode(struct super_block *sb) else inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); - if (inode) - return inode_init_always(sb, inode); - return NULL; + if (!inode) + return NULL; + + if (unlikely(inode_init_always(sb, inode))) { + if (inode->i_sb->s_op->destroy_inode) + inode->i_sb->s_op->destroy_inode(inode); + else + kmem_cache_free(inode_cachep, inode); + return NULL; + } + + return inode; } -void destroy_inode(struct inode *inode) +void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); ima_inode_free(inode); @@ -237,13 +241,17 @@ void destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif +} +EXPORT_SYMBOL(__destroy_inode); + +void destroy_inode(struct inode *inode) +{ + __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else kmem_cache_free(inode_cachep, (inode)); } -EXPORT_SYMBOL(destroy_inode); - /* * These are initializations that only need to be done diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 5edc2bf2058..23c94753986 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -99,7 +99,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) kunmap(pg); D2(printk(KERN_DEBUG "readpage finished\n")); - return 0; + return ret; } int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) diff --git a/fs/namespace.c b/fs/namespace.c index 277c28a63ea..7230787d18b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -316,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int mnt_want_write_file(struct file *file) { - if (!(file->f_mode & FMODE_WRITE)) + struct inode *inode = file->f_dentry->d_inode; + if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) return mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); diff --git a/fs/proc/base.c b/fs/proc/base.c index 3ce5ae9e3d2..175db258942 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { - struct mm_struct *mm = get_task_mm(task); - if (!mm) + struct mm_struct *mm; + + if (mutex_lock_killable(&task->cred_guard_mutex)) return NULL; - down_read(&mm->mmap_sem); - task_lock(task); - if (task->mm != mm) - goto out; - if (task->mm != current->mm && - __ptrace_may_access(task, PTRACE_MODE_READ) < 0) - goto out; - task_unlock(task); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, PTRACE_MODE_READ)) { + mmput(mm); + mm = NULL; + } + mutex_unlock(&task->cred_guard_mutex); + return mm; -out: - task_unlock(task); - up_read(&mm->mmap_sem); - mmput(mm); - return NULL; } static int proc_pid_cmdline(struct task_struct *task, char * buffer) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6f61b7cc32e..9bd8be1d235 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) mm = mm_for_maps(priv->task); if (!mm) return NULL; + down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); priv->tail_vma = tail_vma; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 64a72e2e765..8f5c05d3dbd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = NULL; return NULL; } + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a..34ec86923f7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -64,6 +64,10 @@ xfs_inode_alloc( ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; + if (inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); @@ -105,17 +109,6 @@ xfs_inode_alloc( #ifdef XFS_DIR2_TRACE ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif - /* - * Now initialise the VFS inode. We do this after the xfs_inode - * initialisation as internal failures will result in ->destroy_inode - * being called and that will pass down through the reclaim path and - * free the XFS inode. This path requires the XFS inode to already be - * initialised. Hence if this call fails, the xfs_inode has already - * been freed and we should not reference it at all in the error - * handling. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) - return NULL; /* prevent anyone from using this yet */ VFS_I(ip)->i_state = I_NEW|I_LOCK; @@ -123,6 +116,71 @@ xfs_inode_alloc( return ip; } +STATIC void +xfs_inode_free( + struct xfs_inode *ip) +{ + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + xfs_idestroy_fork(ip, XFS_DATA_FORK); + break; + } + + if (ip->i_afp) + xfs_idestroy_fork(ip, XFS_ATTR_FORK); + +#ifdef XFS_INODE_TRACE + ktrace_free(ip->i_trace); +#endif +#ifdef XFS_BMAP_TRACE + ktrace_free(ip->i_xtrace); +#endif +#ifdef XFS_BTREE_TRACE + ktrace_free(ip->i_btrace); +#endif +#ifdef XFS_RW_TRACE + ktrace_free(ip->i_rwtrace); +#endif +#ifdef XFS_ILOCK_TRACE + ktrace_free(ip->i_lock_trace); +#endif +#ifdef XFS_DIR2_TRACE + ktrace_free(ip->i_dir_trace); +#endif + + if (ip->i_itemp) { + /* + * Only if we are shutting down the fs will we see an + * inode still in the AIL. If it is there, we should remove + * it to prevent a use-after-free from occurring. + */ + xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; + + ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || + XFS_FORCED_SHUTDOWN(ip->i_mount)); + if (lip->li_flags & XFS_LI_IN_AIL) { + spin_lock(&ailp->xa_lock); + if (lip->li_flags & XFS_LI_IN_AIL) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; + } + + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + + kmem_zone_free(xfs_inode_zone, ip); +} + /* * Check the validity of the inode we just found it the cache */ @@ -167,7 +225,7 @@ xfs_iget_cache_hit( * errors cleanly, then tag it so it can be set up correctly * later. */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { + if (inode_init_always(mp->m_super, VFS_I(ip))) { error = ENOMEM; goto out_error; } @@ -299,7 +357,8 @@ out_preload_end: if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: - xfs_destroy_inode(ip); + __destroy_inode(VFS_I(ip)); + xfs_inode_free(ip); return error; } @@ -504,62 +563,7 @@ xfs_ireclaim( xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - switch (ip->i_d.di_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - xfs_idestroy_fork(ip, XFS_DATA_FORK); - break; - } - - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - -#ifdef XFS_INODE_TRACE - ktrace_free(ip->i_trace); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(ip->i_xtrace); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(ip->i_btrace); -#endif -#ifdef XFS_RW_TRACE - ktrace_free(ip->i_rwtrace); -#endif -#ifdef XFS_ILOCK_TRACE - ktrace_free(ip->i_lock_trace); -#endif -#ifdef XFS_DIR2_TRACE - ktrace_free(ip->i_dir_trace); -#endif - if (ip->i_itemp) { - /* - * Only if we are shutting down the fs will we see an - * inode still in the AIL. If it is there, we should remove - * it to prevent a use-after-free from occurring. - */ - xfs_log_item_t *lip = &ip->i_itemp->ili_item; - struct xfs_ail *ailp = lip->li_ailp; - - ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || - XFS_FORCED_SHUTDOWN(ip->i_mount)); - if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&ailp->xa_lock); - if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; - } - /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); + xfs_inode_free(ip); } /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1804f866a71..65f24a3cc99 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -310,23 +310,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) } /* - * Get rid of a partially initialized inode. - * - * We have to go through destroy_inode to make sure allocations - * from init_inode_always like the security data are undone. - * - * We mark the inode bad so that it takes the short cut in - * the reclaim path instead of going through the flush path - * which doesn't make sense for an inode that has never seen the - * light of day. - */ -static inline void xfs_destroy_inode(struct xfs_inode *ip) -{ - make_bad_inode(VFS_I(ip)); - return destroy_inode(VFS_I(ip)); -} - -/* * i_flags helper functions */ static inline void |