From 035a571120ddbe4f92b91bbe46f3eff05b6e43eb Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 7 Apr 2009 07:40:57 +0800 Subject: ocfs2: Reserve 1 more cluster in expanding_inline_dir for indexed dir. In ocfs2_expand_inline_dir, we calculate whether we need 1 extra cluster if we can't store the dx inline the root and save it in dx_alloc. So add it when we call ocfs2_reserve_clusters. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index e71160cda11..07d89204f0d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2934,7 +2934,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, */ BUG_ON(alloc > 2); - ret = ocfs2_reserve_clusters(osb, alloc, &data_ac); + ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac); if (ret) { mlog_errno(ret); goto out; -- cgit v1.2.3 From ff6945279d45edd8f6b0a5ddb1ef16cecce3ea9c Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 20 Apr 2009 19:45:13 +0000 Subject: [CIFS] Make cifs_unlink consistent in checks for null inode Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 ++++++ fs/cifs/cifsfs.h | 2 +- fs/cifs/inode.c | 21 +++++++++++++++------ 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 9d1fb6ec8a5..1bf81813627 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,9 @@ +Version 1.58 +------------ +Guard against buffer overruns in various UCS-2 to UTF-8 string conversions +when the UTF-8 string is composed of unusually long (more than 4 byte) converted +characters. + Version 1.57 ------------ Improve support for multiple security contexts to the same server. We diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 77e190dc288..051b71cfdea 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.57" +#define CIFS_VERSION "1.58" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f36b4e40e44..9c869a6dcba 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -962,13 +962,21 @@ undo_setattr: goto out_close; } + +/* + * If dentry->d_inode is null (usually meaning the cached dentry + * is a negative dentry) then we would attempt a standard SMB delete, but + * if that fails we can not attempt the fall back mechanisms on EACESS + * but will return the EACESS to the caller. Note that the VFS does not call + * unlink on negative dentries currently. + */ int cifs_unlink(struct inode *dir, struct dentry *dentry) { int rc = 0; int xid; char *full_path = NULL; struct inode *inode = dentry->d_inode; - struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifsInodeInfo *cifs_inode; struct super_block *sb = dir->i_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsTconInfo *tcon = cifs_sb->tcon; @@ -1012,7 +1020,7 @@ psx_del_no_retry: rc = cifs_rename_pending_delete(full_path, dentry, xid); if (rc == 0) drop_nlink(inode); - } else if (rc == -EACCES && dosattr == 0) { + } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { rc = -ENOMEM; @@ -1020,7 +1028,8 @@ psx_del_no_retry: } /* try to reset dos attributes */ - origattr = cifsInode->cifsAttrs; + cifs_inode = CIFS_I(inode); + origattr = cifs_inode->cifsAttrs; if (origattr == 0) origattr |= ATTR_NORMAL; dosattr = origattr & ~ATTR_READONLY; @@ -1041,13 +1050,13 @@ psx_del_no_retry: out_reval: if (inode) { - cifsInode = CIFS_I(inode); - cifsInode->time = 0; /* will force revalidate to get info + cifs_inode = CIFS_I(inode); + cifs_inode->time = 0; /* will force revalidate to get info when needed */ inode->i_ctime = current_fs_time(sb); } dir->i_ctime = dir->i_mtime = current_fs_time(sb); - cifsInode = CIFS_I(dir); + cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ kfree(full_path); -- cgit v1.2.3 From 968460ebd8006d55661dec0fb86712b40d71c413 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Mon, 20 Apr 2009 18:54:21 +0530 Subject: cifs: Rename cifs_strncpy_to_host and fix buffer size There is a possibility for the path_name and node_name buffers to overflow if they contain charcters that are >2 bytes in the local charset. Resize the buffer allocation so to avoid this possibility. Also, as pointed out by Jeff Layton, it would be appropriate to rename the function to cifs_strlcpy_to_host to reflect the fact that the copied string is always NULL terminated. Signed-off-by: Suresh Jayaraman Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a0845dc7b8a..a02c43b3faf 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -88,29 +88,29 @@ static struct { * on failure - errno */ static int -cifs_strncpy_to_host(char **dst, const char *src, const int maxlen, +cifs_strlcpy_to_host(char **dst, const char *src, const int maxlen, const bool is_unicode, const struct nls_table *nls_codepage) { int plen; if (is_unicode) { plen = UniStrnlen((wchar_t *)src, maxlen); - *dst = kmalloc(plen + 2, GFP_KERNEL); + *dst = kmalloc((4 * plen) + 2, GFP_KERNEL); if (!*dst) - goto cifs_strncpy_to_host_ErrExit; + goto cifs_strlcpy_to_host_ErrExit; cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage); + (*dst)[plen] = 0; + (*dst)[plen+1] = 0; /* needed for Unicode */ } else { plen = strnlen(src, maxlen); *dst = kmalloc(plen + 2, GFP_KERNEL); if (!*dst) - goto cifs_strncpy_to_host_ErrExit; - strncpy(*dst, src, plen); + goto cifs_strlcpy_to_host_ErrExit; + strlcpy(*dst, src, plen); } - (*dst)[plen] = 0; - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */ return 0; -cifs_strncpy_to_host_ErrExit: +cifs_strlcpy_to_host_ErrExit: cERROR(1, ("Failed to allocate buffer for string\n")); return -ENOMEM; } @@ -4029,7 +4029,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy DfsPath */ temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); max_len = data_end - temp; - rc = cifs_strncpy_to_host(&(node->path_name), temp, + rc = cifs_strlcpy_to_host(&(node->path_name), temp, max_len, is_unicode, nls_codepage); if (rc) goto parse_DFS_referrals_exit; @@ -4037,7 +4037,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy link target UNC */ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); max_len = data_end - temp; - rc = cifs_strncpy_to_host(&(node->node_name), temp, + rc = cifs_strlcpy_to_host(&(node->node_name), temp, max_len, is_unicode, nls_codepage); if (rc) goto parse_DFS_referrals_exit; -- cgit v1.2.3 From 7b0c8fcff47a885743125dd843db64af41af5a61 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Mon, 20 Apr 2009 18:54:36 +0530 Subject: cifs: Increase size of tmp_buf in cifs_readdir to avoid potential overflows Increase size of tmp_buf to possible maximum to avoid potential overflows. Pointed-out-by: Jeff Layton Signed-off-by: Suresh Jayaraman Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 1a8be622833..ebd0da7ecb3 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -1074,7 +1074,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) with the rare long characters alloc more to account for such multibyte target UTF-8 characters. cifs_unicode.c, which actually does the conversion, has the same limit */ - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL); + tmp_buf = kmalloc((4 * NAME_MAX) + 2, GFP_KERNEL); for (i = 0; (i < num_to_fill) && (rc == 0); i++) { if (current_entry == NULL) { /* evaluate whether this case is an error */ -- cgit v1.2.3 From 3c48f23adada870db612a0dd3488605c4af5c0a5 Mon Sep 17 00:00:00 2001 From: Subrata Modak Date: Sun, 19 Apr 2009 01:10:03 +0530 Subject: configfs: Fix Trivial Warning in fs/configfs/symlink.c I observed the following build warning with fs/configfs/symlink.c: fs/configfs/symlink.c: In function 'configfs_symlink': fs/configfs/symlink.c:138: warning: 'target_item' may be used uninitialized in this function Here is a small fix for this. Cc: Patrick Mochel Cc: Balbir Singh Cc: Sachin P Sant Signed-Off-By: Subrata Modak Signed-off-by: Joel Becker --- fs/configfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 932a92b3148..c8afa6b1d91 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -135,7 +135,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna struct path path; struct configfs_dirent *sd; struct config_item *parent_item; - struct config_item *target_item; + struct config_item *target_item = NULL; struct config_item_type *type; ret = -EPERM; /* What lack-of-symlink returns */ -- cgit v1.2.3 From 0fba813748f16f4eaf24d492c505226c4026d58f Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 19 Mar 2009 05:08:43 +0800 Subject: ocfs2: Fix 2 warning during ocfs2 make. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/ocfs2/dir.c: In function ‘ocfs2_extend_dir’: fs/ocfs2/dir.c:2700: warning: ‘ret’ may be used uninitialized in this function fs/ocfs2/suballoc.c: In function ‘ocfs2_get_suballoc_slot_bit’: fs/ocfs2/suballoc.c:2216: warning: comparison is always true due to limited range of data type Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/dir.c | 2 +- fs/ocfs2/suballoc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 07d89204f0d..c5752305627 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2697,7 +2697,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir, u32 *num_dx_entries, struct buffer_head *dirent_bh) { - int ret, namelen, i; + int ret = 0, namelen, i; char *de_buf, *limit; struct ocfs2_dir_entry *de; struct buffer_head *dx_leaf_bh; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b4ca5911caa..eb21dbb0ee0 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2213,7 +2213,7 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, goto bail; } - if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT && + if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT && (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); -- cgit v1.2.3 From 5b09b507daaa882d888b6cd78ee89ba9caace44b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 21 Apr 2009 16:31:20 -0700 Subject: ocfs2: Fix some printk() warnings. The old %llu vs u64 battle. Cast them correctly. Signed-off-by: Joel Becker --- fs/ocfs2/export.c | 9 +++++---- fs/ocfs2/suballoc.c | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index de3da8eb558..15713cbb865 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, /* If the inode allocator bit is clear, this inode must be stale */ if (!set) { - mlog(0, "inode %llu suballoc bit is clear\n", blkno); + mlog(0, "inode %llu suballoc bit is clear\n", + (unsigned long long)blkno); status = -ESTALE; goto unlock_nfs_sync; } @@ -114,7 +115,7 @@ check_err: if (status < 0) { if (status == -ESTALE) { mlog(0, "stale inode ino: %llu generation: %u\n", - blkno, handle->ih_generation); + (unsigned long long)blkno, handle->ih_generation); } result = ERR_PTR(status); goto bail; @@ -129,8 +130,8 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { iput(inode); - mlog(0, "stale inode ino: %llu generation: %u\n", blkno, - handle->ih_generation); + mlog(0, "stale inode ino: %llu generation: %u\n", + (unsigned long long)blkno, handle->ih_generation); result = ERR_PTR(-ESTALE); goto bail; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index eb21dbb0ee0..8439f6b324b 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2197,18 +2197,20 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, struct buffer_head *inode_bh = NULL; struct ocfs2_dinode *inode_fe; - mlog_entry("blkno: %llu\n", blkno); + mlog_entry("blkno: %llu\n", (unsigned long long)blkno); /* dirty read disk */ status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); if (status < 0) { - mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status); + mlog(ML_ERROR, "read block %llu failed %d\n", + (unsigned long long)blkno, status); goto bail; } inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; if (!OCFS2_IS_VALID_DINODE(inode_fe)) { - mlog(ML_ERROR, "invalid inode %llu requested\n", blkno); + mlog(ML_ERROR, "invalid inode %llu requested\n", + (unsigned long long)blkno); status = -EINVAL; goto bail; } @@ -2216,7 +2218,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT && (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", - blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); + (unsigned long long)blkno, + (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); status = -EINVAL; goto bail; } @@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, u64 bg_blkno; int status; - mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); + mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, + (unsigned int)bit); alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { @@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, &group_bh); if (status < 0) { - mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status); + mlog(ML_ERROR, "read group %llu failed %d\n", + (unsigned long long)bg_blkno, status); goto bail; } @@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; - mlog_entry("blkno: %llu", blkno); + mlog_entry("blkno: %llu", (unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, &suballoc_bit); -- cgit v1.2.3 From a5a0a630922a2f6a774b6dac19f70cb5abd86bb0 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 20 Apr 2009 21:34:18 -0700 Subject: ocfs2: Add missing iput() during error handling in ocfs2_dentry_attach_lock() In ocfs2_dentry_attach_lock(), if unable to get the dentry lock, we need to call iput(inode) because a failure here means no d_instantiate(), which means the normally matching iput() will not be called during dput(dentry). This patch fixes the oops that accompanies the following message: (3996,1):dlm_empty_lockres:2708 ERROR: lockres W00000000000000000a1046b06a4382 still has local locks! kernel BUG in dlm_empty_lockres at /rpmbuild/smushran/BUILD/ocfs2-1.4.2/fs/ocfs2/dlm/dlmmaster.c:2709! Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dcache.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 7d604480557..b574431a031 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -290,6 +290,21 @@ out_attach: else mlog_errno(ret); + /* + * In case of error, manually free the allocation and do the iput(). + * We need to do this because error here means no d_instantiate(), + * which means iput() will not be called during dput(dentry). + */ + if (ret < 0 && !alias) { + ocfs2_lock_res_free(&dl->dl_lockres); + BUG_ON(dl->dl_count != 1); + spin_lock(&dentry_attach_lock); + dentry->d_fsdata = NULL; + spin_unlock(&dentry_attach_lock); + kfree(dl); + iput(inode); + } + dput(alias); return ret; -- cgit v1.2.3 From 485c26ec70f823f2a9cf45982b724893e53a859e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 13:43:20 -0400 Subject: ext4: Fix softlockup caused by illegal i_file_acl value in on-disk inode If the block containing external extended attributes (which is stored in i_file_acl and i_file_acl_high) is larger than the on-disk filesystem, the process which tried to access the extended attributes will endlessly issue kernel printks complaining that "__find_get_block_slow() failed", locking up that CPU until the system is forcibly rebooted. So when we read in the inode, make sure the i_file_acl value is legal, and if not, flag the filesystem as being corrupted. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c6bd6ced3bb..cab75bbcd57 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4409,7 +4409,17 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } - if (ei->i_flags & EXT4_EXTENTS_FL) { + if (ei->i_file_acl && + ((ei->i_file_acl < + (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + + EXT4_SB(sb)->s_gdb_count)) || + (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { + ext4_error(sb, __func__, + "bad extended attribute block %llu in inode #%lu", + ei->i_file_acl, inode->i_ino); + ret = -EIO; + goto bad_inode; + } else if (ei->i_flags & EXT4_EXTENTS_FL) { /* Validate extent which is part of inode */ ret = ext4_ext_check_inode(inode); } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -- cgit v1.2.3 From 97e728d4353f38c87bf0804cdfd79a9b13fc2c3e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 21 Apr 2009 17:40:57 -0400 Subject: Btrfs: try to keep a healthy ratio of metadata vs data block groups This patch makes the chunk allocator keep a good ratio of metadata vs data block groups. By default for every 8 data block groups, we'll allocate 1 metadata chunk, or about 12% of the disk will be allocated for metadata. This can be changed by specifying the metadata_ratio mount option. This is simply the number of data block groups that have to be allocated to force a metadata chunk allocation. By making sure we allocate metadata chunks more often, we are less likely to get into situations where the whole disk has been allocated as data block groups. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++- fs/btrfs/super.c | 12 +++++++++++- 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad96495dedc..213535f45da 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -881,6 +881,9 @@ struct btrfs_fs_info { u64 metadata_alloc_profile; u64 system_alloc_profile; + unsigned data_chunk_allocations; + unsigned metadata_ratio; + void *bdev_holder; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6b83744b05..44c94d808e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1604,6 +1604,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->metadata_ratio = 8; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 178df4c67de..2895a837323 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1918,15 +1918,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, spin_unlock(&info->lock); } +static void force_metadata_allocation(struct btrfs_fs_info *info) +{ + struct list_head *head = &info->space_info; + struct btrfs_space_info *found; + + rcu_read_lock(); + list_for_each_entry_rcu(found, head, list) { + if (found->flags & BTRFS_BLOCK_GROUP_METADATA) + found->force_alloc = 1; + } + rcu_read_unlock(); +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) { struct btrfs_space_info *space_info; + struct btrfs_fs_info *fs_info = extent_root->fs_info; u64 thresh; int ret = 0; - mutex_lock(&extent_root->fs_info->chunk_mutex); + mutex_lock(&fs_info->chunk_mutex); flags = btrfs_reduce_alloc_profile(extent_root, flags); @@ -1958,6 +1972,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } spin_unlock(&space_info->lock); + /* + * if we're doing a data chunk, go ahead and make sure that + * we keep a reasonable number of metadata chunks allocated in the + * FS as well. + */ + if (flags & BTRFS_BLOCK_GROUP_DATA) { + fs_info->data_chunk_allocations++; + if (!(fs_info->data_chunk_allocations % + fs_info->metadata_ratio)) + force_metadata_allocation(fs_info); + } + ret = btrfs_alloc_chunk(trans, extent_root, flags); if (ret) space_info->full = 1; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e..30c9a8ca2a5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -68,7 +68,7 @@ enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, - Opt_flushoncommit, Opt_err, + Opt_ratio, Opt_flushoncommit, Opt_err, }; static match_table_t tokens = { @@ -87,6 +87,7 @@ static match_table_t tokens = { {Opt_noacl, "noacl"}, {Opt_notreelog, "notreelog"}, {Opt_flushoncommit, "flushoncommit"}, + {Opt_ratio, "metadata_ratio=%d"}, {Opt_err, NULL}, }; @@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); break; + case Opt_ratio: + intarg = 0; + match_int(&args[0], &intarg); + if (intarg) { + info->metadata_ratio = intarg; + printk(KERN_INFO "btrfs: metadata ratio %d\n", + info->metadata_ratio); + } + break; default: break; } -- cgit v1.2.3 From 2ea2544ef5dad5cac52f1e4c7b812631274fc1cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2009 15:32:28 +0200 Subject: Btrfs: simplify makefile Get rid of the hacks for building out of tree, and always use += for assigning to the object lists. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9adf5e4f7e9..94212844a9b 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,25 +1,10 @@ -ifneq ($(KERNELRELEASE),) -# kbuild part of makefile obj-$(CONFIG_BTRFS_FS) := btrfs.o -btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + +btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ compression.o delayed-ref.o -else - -# Normal Makefile - -KERNELDIR := /lib/modules/`uname -r`/build -all: - $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install -clean: - $(MAKE) -C $(KERNELDIR) M=`pwd` clean - -endif -- cgit v1.2.3 From 0d4bf11e5309eff64272a49e1ea55658372abc56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2009 15:33:56 +0200 Subject: Btrfs: don't export symbols Currently the extent_map code is only for btrfs so don't export it's symbols. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b187917b36f..9827fa1de4e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -43,7 +43,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) tree->map.rb_node = NULL; spin_lock_init(&tree->lock); } -EXPORT_SYMBOL(extent_map_tree_init); /** * alloc_extent_map - allocate new extent map structure @@ -64,7 +63,6 @@ struct extent_map *alloc_extent_map(gfp_t mask) atomic_set(&em->refs, 1); return em; } -EXPORT_SYMBOL(alloc_extent_map); /** * free_extent_map - drop reference count of an extent_map @@ -83,7 +81,6 @@ void free_extent_map(struct extent_map *em) kmem_cache_free(extent_map_cache, em); } } -EXPORT_SYMBOL(free_extent_map); static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) @@ -264,7 +261,6 @@ int add_extent_mapping(struct extent_map_tree *tree, out: return ret; } -EXPORT_SYMBOL(add_extent_mapping); /* simple helper to do math around the end of an extent, handling wrap */ static u64 range_end(u64 start, u64 len) @@ -326,7 +322,6 @@ found: out: return em; } -EXPORT_SYMBOL(lookup_extent_mapping); /** * remove_extent_mapping - removes an extent_map from the extent tree @@ -346,4 +341,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) em->in_tree = 0; return ret; } -EXPORT_SYMBOL(remove_extent_mapping); -- cgit v1.2.3 From 9601e3f6336f6ca66929f451b1f66085e68e36e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Apr 2009 15:33:09 +0200 Subject: Btrfs: kill btrfs_cache_create Just use kmem_cache_create directly. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 18 ++++++------------ fs/btrfs/extent_map.c | 11 +++-------- fs/btrfs/inode.c | 42 +++++++++++++++++++----------------------- 3 files changed, 28 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 05a1c42e25b..c33b54029d7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -17,12 +17,6 @@ #include "ctree.h" #include "btrfs_inode.h" -/* temporary define until extent_map moves out of btrfs */ -struct kmem_cache *btrfs_cache_create(const char *name, size_t size, - unsigned long extra_flags, - void (*ctor)(void *, struct kmem_cache *, - unsigned long)); - static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -58,15 +52,15 @@ struct extent_page_data { int __init extent_io_init(void) { - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); + extent_state_cache = kmem_cache_create("extent_state", + sizeof(struct extent_state), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_state_cache) return -ENOMEM; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); + extent_buffer_cache = kmem_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_buffer_cache) goto free_state_cache; return 0; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 9827fa1de4e..30c9365861e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -6,19 +6,14 @@ #include #include "extent_map.h" -/* temporary define until extent_map moves out of btrfs */ -struct kmem_cache *btrfs_cache_create(const char *name, size_t size, - unsigned long extra_flags, - void (*ctor)(void *, struct kmem_cache *, - unsigned long)); static struct kmem_cache *extent_map_cache; int __init extent_map_init(void) { - extent_map_cache = btrfs_cache_create("extent_map", - sizeof(struct extent_map), 0, - NULL); + extent_map_cache = kmem_cache_create("extent_map", + sizeof(struct extent_map), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_map_cache) return -ENOMEM; return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65219f6a16a..176b6cc28b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4640,39 +4640,35 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); } -struct kmem_cache *btrfs_cache_create(const char *name, size_t size, - unsigned long extra_flags, - void (*ctor)(void *)) -{ - return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD | extra_flags), ctor); -} - int btrfs_init_cachep(void) { - btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", - sizeof(struct btrfs_inode), - 0, init_once); + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = - btrfs_cache_create("btrfs_trans_handle_cache", - sizeof(struct btrfs_trans_handle), - 0, NULL); + + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!btrfs_trans_handle_cachep) goto fail; - btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", - sizeof(struct btrfs_transaction), - 0, NULL); + + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!btrfs_transaction_cachep) goto fail; - btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", - sizeof(struct btrfs_path), - 0, NULL); + + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_path), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, - SLAB_DESTROY_BY_RCU, NULL); + + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", 256, 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU, NULL); if (!btrfs_bit_radix_cachep) goto fail; return 0; -- cgit v1.2.3 From e980b50cda1610f1c17978d9b7fd311a9dd93877 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 24 Apr 2009 14:39:24 -0400 Subject: Btrfs: fix fallocate deadlock on inode extent lock The btrfs fallocate call takes an extent lock on the entire range being fallocated, and then runs through insert_reserved_extent on each extent as they are allocated. The problem with this is that btrfs_drop_extents may decide to try and take the same extent lock fallocate was already holding. The solution used here is to push down knowledge of the range that is already locked going into btrfs_drop_extents. It turns out that at least one other caller had the same bug. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/file.c | 11 ++++++----- fs/btrfs/inode.c | 27 ++++++++++++++++++--------- fs/btrfs/ioctl.c | 3 ++- fs/btrfs/tree-log.c | 2 +- 5 files changed, 29 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 213535f45da..4414a5d9983 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2177,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_limit, u64 *hint_block); + u64 start, u64 end, u64 locked_end, + u64 inline_limit, u64 *hint_block); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 482f8db2cfd..da3ed965c95 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -363,15 +363,16 @@ out: */ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_limit, u64 *hint_byte) + u64 start, u64 end, u64 locked_end, + u64 inline_limit, u64 *hint_byte) { u64 extent_end = 0; - u64 locked_end = end; u64 search_start = start; u64 leaf_start; u64 ram_bytes = 0; u64 orig_parent = 0; u64 disk_bytenr = 0; + u64 orig_locked_end = locked_end; u8 compression; u8 encryption; u16 other_encoding = 0; @@ -684,9 +685,9 @@ next_slot: } out: btrfs_free_path(path); - if (locked_end > end) { - unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, - GFP_NOFS); + if (locked_end > orig_locked_end) { + unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, + locked_end - 1, GFP_NOFS); } btrfs_check_file(root, inode); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 176b6cc28b1..2fdb2995be6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -234,7 +234,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, } ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, start, &hint_byte); + aligned_end, aligned_end, start, &hint_byte); BUG_ON(ret); if (isize > actual_end) @@ -1439,6 +1439,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_pos, u64 disk_bytenr, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, + u64 locked_end, u8 compression, u8 encryption, u16 other_encoding, int extent_type) { @@ -1455,7 +1456,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_drop_extents(trans, root, inode, file_pos, - file_pos + num_bytes, file_pos, &hint); + file_pos + num_bytes, locked_end, + file_pos, &hint); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1590,6 +1592,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, + ordered_extent->file_offset + + ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); BUG_ON(ret); @@ -2877,6 +2881,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) err = btrfs_drop_extents(trans, root, inode, cur_offset, cur_offset + hole_size, + block_end, cur_offset, &hint_byte); if (err) break; @@ -4968,7 +4973,7 @@ out_fail: static int prealloc_file_range(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end, - u64 alloc_hint, int mode) + u64 locked_end, u64 alloc_hint, int mode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; @@ -4989,7 +4994,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, - ins.offset, 0, 0, 0, + ins.offset, locked_end, + 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); num_bytes -= ins.offset; @@ -5018,6 +5024,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 alloc_start; u64 alloc_end; u64 alloc_hint = 0; + u64 locked_end; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; struct btrfs_trans_handle *trans; @@ -5039,6 +5046,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -5051,8 +5059,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, /* the extent lock is ordered inside the running * transaction */ - lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, - alloc_end - 1, GFP_NOFS); + lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && @@ -5060,7 +5068,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, ordered->file_offset < alloc_end) { btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, - alloc_start, alloc_end - 1, GFP_NOFS); + alloc_start, locked_end, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); /* @@ -5085,7 +5093,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE) { ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, alloc_hint, mode); + last_byte, locked_end + 1, + alloc_hint, mode); if (ret < 0) { free_extent_map(em); break; @@ -5101,7 +5110,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, break; } } - unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, + unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be1..f4e5d2e5ece 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -830,7 +830,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, BUG_ON(!trans); /* punch hole in destination first */ - btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); + btrfs_drop_extents(trans, root, inode, off, off + len, + off + len, 0, &hint_byte); /* clone data */ key.objectid = src->i_ino; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 25f20ea11f2..db5e212e844 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, - start, extent_end, start, &alloc_hint); + start, extent_end, extent_end, start, &alloc_hint); BUG_ON(ret); if (found_type == BTRFS_FILE_EXTENT_REG || -- cgit v1.2.3 From 59bc5c758ece00fb0b2a170dd8fbbf31f1856c8a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 24 Apr 2009 14:39:25 -0400 Subject: Btrfs: fix deadlocks and stalls on dead root removal After a transaction commit, the old root of the subvol btrees are sent through snapshot removal. This is what actually frees up any blocks replaced by COW, and anything the old blocks pointed to. Snapshot deletion will pause when a transaction commit has started, which helps to avoid a huge amount of delayed reference count updates piling up as the transaction is trying to close. But, this pause happens after the snapshot deletion process has asked other procs on the system to throttle back a bit so that it can make progress. We don't want to throttle everyone while we're waiting for the transaction commit, it leads to deadlocks in the user transaction ioctls used by Ceph and makes things slower in general. This patch changes things to avoid the throttling while we sleep. Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2869b3361eb..01b143605ec 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) prepare_to_wait(&info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&info->trans_mutex); + + atomic_dec(&info->throttles); + wake_up(&info->transaction_throttle); + schedule(); + + atomic_inc(&info->throttles); mutex_lock(&info->trans_mutex); finish_wait(&info->transaction_wait, &wait); } -- cgit v1.2.3 From a9e817425dc0baede8ebe5fbc9984a640257432b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 16:11:18 -0400 Subject: ext4: Ignore i_file_acl_high unless EXT4_FEATURE_INCOMPAT_64BIT is present Don't try to look at i_file_acl_high unless the INCOMPAT_64BIT feature bit is set. The field is normally zero, but older versions of e2fsck didn't automatically check to make sure of this, so in the spirit of "be liberal in what you accept", don't look at i_file_acl_high unless we are using a 64-bit filesystem. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cab75bbcd57..11460037ea9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_flags = le32_to_cpu(raw_inode->i_flags); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_HURD)) { + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; - } inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; inode->i_generation = le32_to_cpu(raw_inode->i_generation); -- cgit v1.2.3 From c4b5a614316c505922a522b2e35ba05ea3e08a7c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 18:45:35 -0400 Subject: ext4: Do not try to validate extents on special files The EXTENTS_FL flag should never be set on special files, but if it is, don't bother trying to validate that the extents tree is valid, since only files, directories, and non-fast symlinks will ever have an extent data structure. We perhaps should flag the filesystem as being corrupted if we see a special file (named pipes, device nodes, Unix domain sockets, etc.) with the EXTENTS_FL flag, but e2fsck doesn't currently check this case, so we'll just ignore this for now, since it's harmless. Without this fix, a special device with the extents flag is flagged as an error by the kernel, so it is impossible to access or delete the inode, but e2fsck doesn't see it as a problem, leading to confused/frustrated users. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11460037ea9..e91f978c7f1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4407,6 +4407,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + ret = 0; if (ei->i_file_acl && ((ei->i_file_acl < (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + @@ -4418,8 +4419,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EIO; goto bad_inode; } else if (ei->i_flags & EXT4_EXTENTS_FL) { - /* Validate extent which is part of inode */ - ret = ext4_ext_check_inode(inode); + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + (S_ISLNK(inode->i_mode) && + !ext4_inode_is_fast_symlink(inode))) + /* Validate extent which is part of inode */ + ret = ext4_ext_check_inode(inode); } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { -- cgit v1.2.3 From d6397baee468809ef311e763dfc6e9f73418f8a6 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Mon, 27 Apr 2009 07:29:03 -0400 Subject: Btrfs: When shrinking, only update disk size on success Previously, we updated a device's size prior to attempting a shrink operation. This patch moves the device resizing logic to only happen if the shrink completes successfully. In the process, it introduces a new field to btrfs_device -- disk_total_bytes -- to track the on-disk size. Signed-off-by: Chris Ball Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 35 ++++++++++++++++++++++++----------- fs/btrfs/volumes.h | 3 +++ 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e53835b8859..5f01dad4b69 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1543,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_mark_buffer_dirty(leaf); @@ -1940,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) device->total_bytes = new_size; if (device->writeable) device->fs_devices->total_rw_bytes -= diff; - ret = btrfs_update_device(trans, device); - if (ret) { - unlock_chunks(root); - btrfs_end_transaction(trans, root); - goto done; - } - WARN_ON(diff > old_total); - btrfs_set_super_total_bytes(super_copy, old_total - diff); unlock_chunks(root); btrfs_end_transaction(trans, root); @@ -1979,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) - goto done; + break; chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); @@ -1992,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) goto done; } + /* Shrinking succeeded, else we would be at "done". */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto done; + } + lock_chunks(root); + + device->disk_total_bytes = new_size; + /* Now btrfs_update_device() will change the on-disk size. */ + ret = btrfs_update_device(trans, device); + if (ret) { + unlock_chunks(root); + btrfs_end_transaction(trans, root); + goto done; + } + WARN_ON(diff > old_total); + btrfs_set_super_total_bytes(super_copy, old_total - diff); + unlock_chunks(root); + btrfs_end_transaction(trans, root); done: btrfs_free_path(path); return ret; @@ -3076,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf, unsigned long ptr; device->devid = btrfs_device_id(leaf, dev_item); - device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5836327ba5d..5c3ff6d02fd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -61,6 +61,9 @@ struct btrfs_device { /* size of the device */ u64 total_bytes; + /* size of the disk */ + u64 disk_total_bytes; + /* bytes used */ u64 bytes_used; -- cgit v1.2.3 From b7967db75a38df4891b22efe1b0969b9357eb946 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 07:29:04 -0400 Subject: Btrfs: remove #if 0 code Btrfs had some old code sitting around under #if 0, this drops it. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 48 +------------------------------- fs/btrfs/extent_io.c | 63 ------------------------------------------ fs/btrfs/file.c | 78 ---------------------------------------------------- 3 files changed, 1 insertion(+), 188 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44c94d808e2..77f9a3b824b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -584,18 +584,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, btrfs_set_work_high_prio(&async->work); btrfs_queue_worker(&fs_info->workers, &async->work); -#if 0 - int limit = btrfs_async_submit_limit(fs_info); - if (atomic_read(&fs_info->nr_async_submits) > limit) { - wait_event_timeout(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_submits) < limit), - HZ/10); - wait_event_timeout(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_bios) < limit), - HZ/10); - } -#endif while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, @@ -770,27 +759,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) } } -#if 0 -static int btree_writepage(struct page *page, struct writeback_control *wbc) -{ - struct buffer_head *bh; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - struct buffer_head *head; - if (!page_has_buffers(page)) { - create_empty_buffers(page, root->fs_info->sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - head = page_buffers(page); - bh = head; - do { - if (buffer_dirty(bh)) - csum_tree_block(root, bh, 0); - bh = bh->b_this_page; - } while (bh != head); - return block_write_full_page(page, btree_get_block, wbc); -} -#endif - static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, @@ -1278,11 +1246,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) int ret = 0; struct btrfs_device *device; struct backing_dev_info *bdi; -#if 0 - if ((bdi_bits & (1 << BDI_write_congested)) && - btrfs_congested_async(info, 0)) - return 1; -#endif + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; @@ -2334,16 +2298,6 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); -#if 0 - while (!list_empty(&fs_info->hashers)) { - struct btrfs_hasher *hasher; - hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, - hashers); - list_del(&hasher->hashers); - crypto_free_hash(&fs_info->hash_tfm); - kfree(hasher); - } -#endif btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c33b54029d7..fe9eb990e44 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1401,69 +1401,6 @@ out: return total_bytes; } -#if 0 -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} - -/* - * helper function to unlock both pages and extents in the tree. - */ -static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -#endif - /* * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index da3ed965c95..1d51dc38bb4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, return 0; } -int btrfs_check_file(struct btrfs_root *root, struct inode *inode) -{ - return 0; -#if 0 - struct btrfs_path *path; - struct btrfs_key found_key; - struct extent_buffer *leaf; - struct btrfs_file_extent_item *extent; - u64 last_offset = 0; - int nritems; - int slot; - int found_type; - int ret; - int err = 0; - u64 extent_end = 0; - - path = btrfs_alloc_path(); - ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, - last_offset, 0); - while (1) { - nritems = btrfs_header_nritems(path->nodes[0]); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) - goto out; - nritems = btrfs_header_nritems(path->nodes[0]); - } - slot = path->slots[0]; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != inode->i_ino) - break; - if (found_key.type != BTRFS_EXTENT_DATA_KEY) - goto out; - - if (found_key.offset < last_offset) { - WARN_ON(1); - btrfs_print_leaf(root, leaf); - printk(KERN_ERR "inode %lu found offset %llu " - "expected %llu\n", inode->i_ino, - (unsigned long long)found_key.offset, - (unsigned long long)last_offset); - err = 1; - goto out; - } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = found_key.offset + - btrfs_file_extent_num_bytes(leaf, extent); - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - struct btrfs_item *item; - item = btrfs_item_nr(leaf, slot); - extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, extent); - extent_end = (extent_end + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); - } - last_offset = extent_end; - path->slots[0]++; - } - if (0 && last_offset < inode->i_size) { - WARN_ON(1); - btrfs_print_leaf(root, leaf); - printk(KERN_ERR "inode %lu found offset %llu size %llu\n", - inode->i_ino, (unsigned long long)last_offset, - (unsigned long long)inode->i_size); - err = 1; - - } -out: - btrfs_free_path(path); - return err; -#endif -} - /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -689,7 +612,6 @@ out: unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, locked_end - 1, GFP_NOFS); } - btrfs_check_file(root, inode); return ret; } -- cgit v1.2.3 From 193f284d4985db0370a8a1bbdfb20df548cf9ffb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 07:29:05 -0400 Subject: Btrfs: ratelimit IO error printks Btrfs has printks for various IO errors, including bad checksums and mismatches between what we expect the block headers to contain and what we actually find on the disk. Longer term we need a real reporting mechanism for this, but for now printk is going to have to do. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 38 +++++++++++++++++++++++++------------- fs/btrfs/inode.c | 10 ++++++---- 2 files changed, 31 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 77f9a3b824b..aa0c259b9c2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk(KERN_INFO "btrfs: %s checksum verify failed " - "on %llu wanted %X found %X level %d\n", - root->fs_info->sb->s_id, - buf->start, val, found, btrfs_header_level(buf)); + if (printk_ratelimit()) { + printk(KERN_INFO "btrfs: %s checksum verify " + "failed on %llu wanted %X found %X " + "level %d\n", + root->fs_info->sb->s_id, + (unsigned long long)buf->start, val, found, + btrfs_header_level(buf)); + } if (result != (char *)&inline_result) kfree(result); return 1; @@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 0; goto out; } - printk("parent transid verify failed on %llu wanted %llu found %llu\n", - (unsigned long long)eb->start, - (unsigned long long)parent_transid, - (unsigned long long)btrfs_header_generation(eb)); + if (printk_ratelimit()) { + printk("parent transid verify failed on %llu wanted %llu " + "found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + } ret = 1; clear_extent_buffer_uptodate(io_tree, eb); out: @@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", - (unsigned long long)found_start, - (unsigned long long)eb->start); + if (printk_ratelimit()) { + printk(KERN_INFO "btrfs bad tree block start " + "%llu %llu\n", + (unsigned long long)found_start, + (unsigned long long)eb->start); + } ret = -EIO; goto err; } @@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, goto err; } if (check_tree_block_fsid(root, eb)) { - printk(KERN_INFO "btrfs bad fsid on block %llu\n", - (unsigned long long)eb->start); + if (printk_ratelimit()) { + printk(KERN_INFO "btrfs bad fsid on block %llu\n", + (unsigned long long)eb->start); + } ret = -EIO; goto err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2fdb2995be6..552e08afc7f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1823,10 +1823,12 @@ good: return 0; zeroit: - printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " - "private %llu\n", page->mapping->host->i_ino, - (unsigned long long)start, csum, - (unsigned long long)private); + if (printk_ratelimit()) { + printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " + "private %llu\n", page->mapping->host->i_ino, + (unsigned long long)start, csum, + (unsigned long long)private); + } memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); -- cgit v1.2.3 From 45c06543afe2772c02f21efee0e2138b4e1c911e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 07:49:10 -0400 Subject: Btrfs: remove unused btrfs_bit_radix slab Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 552e08afc7f..98bd5069d54 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; -struct kmem_cache *btrfs_bit_radix_cachep; struct kmem_cache *btrfs_path_cachep; #define S_SHIFT 12 @@ -4641,8 +4640,6 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_trans_handle_cachep); if (btrfs_transaction_cachep) kmem_cache_destroy(btrfs_transaction_cachep); - if (btrfs_bit_radix_cachep) - kmem_cache_destroy(btrfs_bit_radix_cachep); if (btrfs_path_cachep) kmem_cache_destroy(btrfs_path_cachep); } @@ -4673,11 +4670,6 @@ int btrfs_init_cachep(void) if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", 256, 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | - SLAB_DESTROY_BY_RCU, NULL); - if (!btrfs_bit_radix_cachep) - goto fail; return 0; fail: btrfs_destroy_cachep(); -- cgit v1.2.3 From e63b6a6c0ffa2ebd8617cc1a10969000296831aa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 21 Apr 2009 12:38:30 -0700 Subject: Btrfs: Fix a trivial warning using max() of u64 vs ULL. A small warning popped up on ia64 because inode-map.c was comparing a u64 object id with the ULL FIRST_FREE_OBJECTID. My first thought was that all the OBJECTID constants should contain the u64 cast because btrfs code deals entirely in u64s. But then I saw how large that was, and figured I'd just fix the max() call. Signed-off-by: Joel Becker Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc7334d833c..9abbced1123 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } path = btrfs_alloc_path(); BUG_ON(!path); - search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); + search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.type = 0; search_key.offset = 0; -- cgit v1.2.3 From 21380931eb4da4e29ac663d0221581282cbba208 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 21 Apr 2009 12:38:29 -0700 Subject: Btrfs: Fix a bunch of printk() warnings. Just happened to notice a bunch of %llu vs u64 warnings. Here's a patch to cast them all. Signed-off-by: Joel Becker Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/extent-tree.c | 21 ++++++++++++++------- fs/btrfs/free-space-cache.c | 15 ++++++++++----- fs/btrfs/ioctl.c | 6 ++++-- fs/btrfs/super.c | 15 +++++++++------ 5 files changed, 40 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aa0c259b9c2..0ff16d3331d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1671,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (features) { printk(KERN_ERR "BTRFS: couldn't mount because of " "unsupported optional features (%Lx).\n", - features); + (unsigned long long)features); err = -EINVAL; goto fail_iput; } @@ -1681,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY) && features) { printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " "unsupported option features (%Lx).\n", - features); + (unsigned long long)features); err = -EINVAL; goto fail_iput; } @@ -2273,7 +2273,7 @@ int close_ctree(struct btrfs_root *root) if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", - fs_info->delalloc_bytes); + (unsigned long long)fs_info->delalloc_bytes); } if (fs_info->total_ref_cache_size) { printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2895a837323..e4966444811 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1844,10 +1844,14 @@ again: printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " "%llu bytes_pinned, %llu bytes_readonly, %llu may use" - "%llu total\n", bytes, data_sinfo->bytes_delalloc, - data_sinfo->bytes_used, data_sinfo->bytes_reserved, - data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, - data_sinfo->bytes_may_use, data_sinfo->total_bytes); + "%llu total\n", (unsigned long long)bytes, + (unsigned long long)data_sinfo->bytes_delalloc, + (unsigned long long)data_sinfo->bytes_used, + (unsigned long long)data_sinfo->bytes_reserved, + (unsigned long long)data_sinfo->bytes_pinned, + (unsigned long long)data_sinfo->bytes_readonly, + (unsigned long long)data_sinfo->bytes_may_use, + (unsigned long long)data_sinfo->total_bytes); return -ENOSPC; } data_sinfo->bytes_may_use += bytes; @@ -2824,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) info->bytes_pinned - info->bytes_reserved), (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," - " may_use=%llu, used=%llu\n", info->total_bytes, - info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, - info->bytes_used); + " may_use=%llu, used=%llu\n", + (unsigned long long)info->total_bytes, + (unsigned long long)info->bytes_pinned, + (unsigned long long)info->bytes_delalloc, + (unsigned long long)info->bytes_may_use, + (unsigned long long)info->bytes_used); down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 768b9523662..0bc93657b46 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, printk(KERN_ERR "couldn't find space %llu to free\n", (unsigned long long)offset); printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", - block_group->cached, block_group->key.objectid, - block_group->key.offset); + block_group->cached, + (unsigned long long)block_group->key.objectid, + (unsigned long long)block_group->key.offset); btrfs_dump_free_space(block_group, bytes); } else if (info) { printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " "but wanted offset=%llu bytes=%llu\n", - info->offset, info->bytes, offset, bytes); + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (unsigned long long)offset, + (unsigned long long)bytes); } WARN_ON(1); } @@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes) count++; - printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, - info->bytes); + printk(KERN_ERR "entry offset %llu, bytes %llu\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes); } printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" "\n", count); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f4e5d2e5ece..48762aa1e94 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -483,11 +483,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) *devstr = '\0'; devstr = vol_args->name; devid = simple_strtoull(devstr, &end, 10); - printk(KERN_INFO "resizing devid %llu\n", devid); + printk(KERN_INFO "resizing devid %llu\n", + (unsigned long long)devid); } device = btrfs_find_device(root, devid, NULL, NULL); if (!device) { - printk(KERN_INFO "resizer unable to find device %llu\n", devid); + printk(KERN_INFO "resizer unable to find device %llu\n", + (unsigned long long)devid); ret = -EINVAL; goto out_unlock; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 30c9a8ca2a5..bf0e84c7560 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -196,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->max_extent = max_t(u64, info->max_extent, root->sectorsize); printk(KERN_INFO "btrfs: max_extent at %llu\n", - info->max_extent); + (unsigned long long)info->max_extent); } break; case Opt_max_inline: @@ -211,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) root->sectorsize); } printk(KERN_INFO "btrfs: max_inline at %llu\n", - info->max_inline); + (unsigned long long)info->max_inline); } break; case Opt_alloc_start: @@ -221,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) kfree(num); printk(KERN_INFO "btrfs: allocations start at %llu\n", - info->alloc_start); + (unsigned long long)info->alloc_start); } break; case Opt_noacl: @@ -420,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_extent != (u64)-1) - seq_printf(seq, ",max_extent=%llu", info->max_extent); + seq_printf(seq, ",max_extent=%llu", + (unsigned long long)info->max_extent); if (info->max_inline != 8192 * 1024) - seq_printf(seq, ",max_inline=%llu", info->max_inline); + seq_printf(seq, ",max_inline=%llu", + (unsigned long long)info->max_inline); if (info->alloc_start != 0) - seq_printf(seq, ",alloc_start=%llu", info->alloc_start); + seq_printf(seq, ",alloc_start=%llu", + (unsigned long long)info->alloc_start); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); -- cgit v1.2.3 From fd1b52435a6d9663de896e8437ef067372916ef3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 18:10:06 +0200 Subject: quota: remove obsolete comments in fs/quota/Makefile Get rid of useless comments and the equally useless obj-y initialization. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/quota/Makefile | 9 --------- 1 file changed, 9 deletions(-) (limited to 'fs') diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 385a0831cc9..68d4f6dc057 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile @@ -1,12 +1,3 @@ -# -# Makefile for the Linux filesystems. -# -# 14 Sep 2000, Christoph Hellwig -# Rewritten to use lists instead of if-statements. -# - -obj-y := - obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o -- cgit v1.2.3 From a069e9cee1dba2f847839d325f46ce6976ed1b76 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Apr 2009 18:07:10 +0200 Subject: ext2: missing unlock in ext2_quota_write() The inode->i_mutex should be unlocked. Found by smatch (http://repo.or.cz/w/smatch.git). Compile tested. Signed-off-by: Dan Carpenter Signed-off-by: Jan Kara --- fs/ext2/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f983225266d..5c4afe65224 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode->i_version++; -- cgit v1.2.3 From 7b1a14bbb0e547aaa4d30cc376e6c8c12539ab0f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 10:49:53 -0400 Subject: Btrfs: fix acl caching Linus noticed the btrfs code to cache acls wasn't properly caching a NULL acl when the inode didn't have any acls. This meant the common case of no acls resulted in expensive btree searches every time the kernel checked permissions (which is quite often). This is a modified version of Linus' original patch: Properly set initial acl fields to BTRFS_ACL_NOT_CACHED in the inode. This forces an acl lookup when permission checks are done. Fix btrfs_get_acl to avoid lookups and locking when the inode acls fields are set to null. Fix btrfs_get_acl to use the right return value from __btrfs_getxattr when deciding to cache a NULL acl. It was storing a NULL acl when __btrfs_getxattr return -ENOENT, but __btrfs_getxattr was actually returning -ENODATA for this case. Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 18 +++++++++++++----- fs/btrfs/inode.c | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 7fdd184a528..cbba000dccb 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) return ERR_PTR(-EINVAL); } + /* Handle the cached NULL acl case without locking */ + acl = ACCESS_ONCE(*p_acl); + if (!acl) + return acl; + spin_lock(&inode->i_lock); - if (*p_acl != BTRFS_ACL_NOT_CACHED) - acl = posix_acl_dup(*p_acl); + acl = *p_acl; + if (acl != BTRFS_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); spin_unlock(&inode->i_lock); - if (acl) + if (acl != BTRFS_ACL_NOT_CACHED) return acl; - size = __btrfs_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) btrfs_update_cached_acl(inode, p_acl, acl); } kfree(value); - } else if (size == -ENOENT) { + } else if (size == -ENOENT || size == -ENODATA || size == 0) { + /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; btrfs_update_cached_acl(inode, p_acl, acl); + } else { + acl = ERR_PTR(-EIO); } return acl; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 98bd5069d54..dc812ec551f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3047,8 +3047,8 @@ static noinline void init_btrfs_i(struct inode *inode) { struct btrfs_inode *bi = BTRFS_I(inode); - bi->i_acl = NULL; - bi->i_default_acl = NULL; + bi->i_acl = BTRFS_ACL_NOT_CACHED; + bi->i_default_acl = BTRFS_ACL_NOT_CACHED; bi->generation = 0; bi->sequence = 0; -- cgit v1.2.3 From 46a53cca826e71effe59e3cb4f383622c33ebdcb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 11:47:50 -0400 Subject: Btrfs: look for acls during btrfs_read_locked_inode This changes btrfs_read_locked_inode() to peek ahead in the btree for acl items. If it is certain a given inode has no acls, it will set the in memory acl fields to null to avoid acl lookups completely. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc812ec551f..90c23eb2882 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2015,6 +2015,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) btrfs_free_path(path); } +/* + * very simple check to peek ahead in the leaf looking for xattrs. If we + * don't find any xattrs, we know there can't be any acls. + * + * slot is the slot the inode is in, objectid is the objectid of the inode + */ +static noinline int acls_after_inode_item(struct extent_buffer *leaf, + int slot, u64 objectid) +{ + u32 nritems = btrfs_header_nritems(leaf); + struct btrfs_key found_key; + int scanned = 0; + + slot++; + while (slot < nritems) { + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + /* we found a different objectid, there must not be acls */ + if (found_key.objectid != objectid) + return 0; + + /* we found an xattr, assume we've got an acl */ + if (found_key.type == BTRFS_XATTR_ITEM_KEY) + return 1; + + /* + * we found a key greater than an xattr key, there can't + * be any acls later on + */ + if (found_key.type > BTRFS_XATTR_ITEM_KEY) + return 0; + + slot++; + scanned++; + + /* + * it goes inode, inode backrefs, xattrs, extents, + * so if there are a ton of hard links to an inode there can + * be a lot of backrefs. Don't waste time searching too hard, + * this is just an optimization + */ + if (scanned >= 8) + break; + } + /* we hit the end of the leaf before we found an xattr or + * something larger than an xattr. We have to assume the inode + * has acls + */ + return 1; +} + /* * read an inode from the btree into the in-memory inode */ @@ -2026,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode) struct btrfs_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; + int maybe_acls; u64 alloc_group_block; u32 rdev; int ret; @@ -2072,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode) alloc_group_block = btrfs_inode_block_group(leaf, inode_item); + /* + * try to precache a NULL acl entry for files that don't have + * any xattrs or acls + */ + maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); + if (!maybe_acls) { + BTRFS_I(inode)->i_acl = NULL; + BTRFS_I(inode)->i_default_acl = NULL; + } + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); btrfs_free_path(path); -- cgit v1.2.3 From 69838727bcd819a8fd73a88447801221788b0c6d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 28 Apr 2009 20:24:29 +0200 Subject: bio: fix memcpy corruption in bio_copy_user_iov() st driver uses blk_rq_map_user() in order to just build a request out of page frames. In this case, map_data->offset is a non zero value and iov[0].iov_base is NULL. We need to increase nr_pages for that. Cc: stable@kernel.org Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- fs/bio.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 7bbc98f0eda..98711647ece 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -817,6 +817,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q, len += iov[i].iov_len; } + if (offset) + nr_pages++; + bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 802b352f2934f799ec2e159f61db6506094a936e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 27 Apr 2009 21:24:28 -0700 Subject: ecryptfs: fix printk format warning fs/ecryptfs/inode.c:670: warning: format '%d' expects type 'int', but argument 3 has type 'size_t' Signed-off-by: Randy Dunlap Signed-off-by: Tyler Hicks Cc: Dustin Kirkland Signed-off-by: Andrew Morton --- fs/ecryptfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 93bc0f8174a..1940edd0830 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -667,7 +667,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); if (lower_buf == NULL) { printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%d] bytes\n", __func__, lower_bufsiz); + "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); rc = -ENOMEM; goto out; } -- cgit v1.2.3 From ac20100df7a7a042423dcb8847f42d9f6ddb8d00 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 27 Apr 2009 13:31:12 -0500 Subject: eCryptfs: Fix min function comparison warning This warning shows up on 64 bit builds: fs/ecryptfs/inode.c:693: warning: comparison of distinct pointer types lacks a cast Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1940edd0830..2f0945d6329 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -690,7 +690,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) } /* Check for bufsiz <= 0 done in sys_readlinkat() */ rc = copy_to_user(buf, plaintext_name, - min((unsigned) bufsiz, plaintext_name_size)); + min((size_t) bufsiz, plaintext_name_size)); if (rc) rc = -EFAULT; else -- cgit v1.2.3 From fd9db7297749c05fcf5721ce5393a5a8b8772f2a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:35 +0200 Subject: fuse: destroy bdi on error Destroy bdi on error in fuse_fill_super(). This was an omission from commit 26c3679101dbccc054dcf370143941844ba70531 "fuse: destroy bdi on umount", which moved the bdi_destroy() call from fuse_conn_put() to fuse_put_super(). Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 459b73dd45e..75ca5ac603a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -908,6 +908,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: + bdi_destroy(&fc->bdi); fuse_conn_put(fc); err_fput: fput(file); -- cgit v1.2.3 From 7e31a966ad270ba32a77c157c015cd7c82faaa55 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 29 Apr 2009 01:20:55 +0800 Subject: ocfs2/trivial: Remove unused variable in ocfs2_rename. With indexed dir enabled, now we use ocfs2_dir_lookup_result to wrap all the bh used for dir. So remove the 2 unused variables. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/namei.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2220f93f668..33464c6b60a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1025,10 +1025,8 @@ static int ocfs2_rename(struct inode *old_dir, struct inode *orphan_dir = NULL; struct ocfs2_dinode *newfe = NULL; char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; - struct buffer_head *orphan_entry_bh = NULL; struct buffer_head *newfe_bh = NULL; struct buffer_head *old_inode_bh = NULL; - struct buffer_head *insert_entry_bh = NULL; struct ocfs2_super *osb = NULL; u64 newfe_blkno, old_de_ino; handle_t *handle = NULL; @@ -1455,8 +1453,6 @@ bail: brelse(old_inode_bh); brelse(old_dir_bh); brelse(new_dir_bh); - brelse(orphan_entry_bh); - brelse(insert_entry_bh); mlog_exit(status); -- cgit v1.2.3 From def6b3ba56b637d58126ef67fc19bab57945fcc4 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 23 Apr 2009 22:18:00 -0400 Subject: xfs_file_last_byte() needs to acquire ilock We had some systems crash with this stack: [] ia64_leave_kernel+0x0/0x280 [] xfs_bmbt_get_startoff+0x0/0x20 [xfs] [] xfs_bmap_last_offset+0x210/0x280 [xfs] [] xfs_file_last_byte+0x70/0x1a0 [xfs] [] xfs_itruncate_start+0xc0/0x1a0 [xfs] [] xfs_inactive_free_eofblocks+0x290/0x460 [xfs] [] xfs_release+0x1b0/0x240 [xfs] [] xfs_file_release+0x70/0xa0 [xfs] [] __fput+0x1a0/0x420 [] fput+0x40/0x60 The problem here is that xfs_file_last_byte() does not acquire the inode lock and can therefore race with another thread that is modifying the extext list. While xfs_bmap_last_offset() is trying to lookup what was the last extent some extents were merged and the extent list shrunk so the index we lookup is now beyond the end of the extent list and potentially in a freed buffer. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e7ae08d1df4..123b20c8cbf 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1258,8 +1258,10 @@ xfs_file_last_byte( * necessary. */ if (ip->i_df.if_flags & XFS_IFEXTENTS) { + xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) { last_block = 0; } -- cgit v1.2.3 From b9ec9068d79e039507a247ebc5bc9c0ce53654ce Mon Sep 17 00:00:00 2001 From: Olaf Weber Date: Fri, 17 Apr 2009 16:12:45 -0500 Subject: xfs: add more checks to superblock validation There had been reports where xfs filesystem was randomly corrupted with fsfuzzer, and xfs failed to handle it gracefully. This patch fixes couple of reported problem by providing additional checks in the superblock validation routine. Signed-off-by: Olaf Weber Reviewed-by: Josef 'Jeff' Sipek Reviewed-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/xfs_mount.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b101990df02..65a99725d0c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -291,14 +291,17 @@ xfs_mount_validate_sb( sbp->sb_sectsize > XFS_MAX_SECTORSIZE || sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || + sbp->sb_sectsize != (1 << sbp->sb_sectlog) || sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_blocksize != (1 << sbp->sb_blocklog) || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + sbp->sb_inodesize != (1 << sbp->sb_inodelog) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || -- cgit v1.2.3 From 5f79ed685fc6122018c4b5826e2e5bdb7bc6f109 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Apr 2009 10:50:48 -0400 Subject: xfs: a couple getbmap cleanups - reshuffle various conditionals for data vs attr fork to make the code more readable - do fine-grainded goto-based error handling - exit early from conditionals instead of keeping a long else branch around - allow kmem_alloc to fail Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_bmap.c | 162 ++++++++++++++++++++++++++---------------------------- 1 file changed, 79 insertions(+), 83 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a6ed426327..abe42448b1c 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5880,7 +5880,7 @@ xfs_getbmap( void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ - int error; /* return value */ + int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ @@ -5899,30 +5899,8 @@ xfs_getbmap( mp = ip->i_mount; iflags = bmv->bmv_iflags; - whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not - * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ - * bit is set for the file, generate a read event in order - * that the DMAPI application may do its thing before we return - * the extents. Usually this means restoring user file data to - * regions of the file that look like holes. - * - * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify - * BMV_IF_NO_DMAPI_READ so that read events are generated. - * If this were not true, callers of ioctl( XFS_IOC_GETBMAP ) - * could misinterpret holes in a DMAPI file as true holes, - * when in fact they may represent offline user data. - */ - if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 && - DM_EVENT_ENABLED(ip, DM_EVENT_READ) && - whichfork == XFS_DATA_FORK) { - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); - if (error) - return XFS_ERROR(error); - } - if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && @@ -5936,11 +5914,37 @@ xfs_getbmap( ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } - } else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); - if (whichfork == XFS_DATA_FORK) { + + prealloced = 0; + fixlen = 1LL << 32; + } else { + /* + * If the BMV_IF_NO_DMAPI_READ interface bit specified, do + * not generate a DMAPI read event. Otherwise, if the + * DM_EVENT_READ bit is set for the file, generate a read + * event in order that the DMAPI application may do its thing + * before we return the extents. Usually this means restoring + * user file data to regions of the file that look like holes. + * + * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify + * BMV_IF_NO_DMAPI_READ so that read events are generated. + * If this were not true, callers of ioctl(XFS_IOC_GETBMAP) + * could misinterpret holes in a DMAPI file as true holes, + * when in fact they may represent offline user data. + */ + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && + !(iflags & BMV_IF_NO_DMAPI_READ)) { + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, + 0, 0, 0, NULL); + if (error) + return XFS_ERROR(error); + } + + if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_format != XFS_DINODE_FMT_BTREE && + ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EINVAL); + if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; @@ -5949,42 +5953,34 @@ xfs_getbmap( prealloced = 0; fixlen = ip->i_size; } - } else { - prealloced = 0; - fixlen = 1LL << 32; } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); - bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset), - (__int64_t)0); - } else if (bmv->bmv_length < 0) - return XFS_ERROR(EINVAL); - if (bmv->bmv_length == 0) { + bmv->bmv_length = + max_t(__int64_t, fixlen - bmv->bmv_offset, 0); + } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; + } else if (bmv->bmv_length < 0) { + return XFS_ERROR(EINVAL); } + nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; xfs_ilock(ip, XFS_IOLOCK_SHARED); - - if (((iflags & BMV_IF_DELALLOC) == 0) && - (whichfork == XFS_DATA_FORK) && - (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { - /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ - error = xfs_flush_pages(ip, (xfs_off_t)0, - -1, 0, FI_REMAPF); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return error; + if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { + if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { + error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); + if (error) + goto out_unlock_iolock; } - } - ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || - ip->i_delayed_blks == 0); + ASSERT(ip->i_delayed_blks == 0); + } lock = xfs_ilock_map_shared(ip); @@ -5995,23 +5991,25 @@ xfs_getbmap( if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; - bmapi_flags = xfs_bmapi_aflag(whichfork) | - ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); + bmapi_flags = xfs_bmapi_aflag(whichfork); + if (!(iflags & BMV_IF_PREALLOC)) + bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ + error = ENOMEM; subnex = 16; - map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP); + map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); + if (!map) + goto out_unlock_ilock; bmv->bmv_entries = 0; - if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { - if (((iflags & BMV_IF_DELALLOC) == 0) || - whichfork == XFS_ATTR_FORK) { - error = 0; - goto unlock_and_return; - } + if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && + (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { + error = 0; + goto out_free_map; } nexleft = nex; @@ -6023,10 +6021,12 @@ xfs_getbmap( bmapi_flags, NULL, 0, map, &nmap, NULL, NULL); if (error) - goto unlock_and_return; + goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { + int full = 0; /* user array is full */ + out.bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out.bmv_oflags |= BMV_OF_PREALLOC; @@ -6041,36 +6041,32 @@ xfs_getbmap( whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out.bmv_oflags |= BMV_OF_LAST; - goto unlock_and_return; - } else { - int full = 0; /* user array is full */ - - if (!xfs_getbmapx_fix_eof_hole(ip, &out, - prealloced, bmvend, - map[i].br_startblock)) { - goto unlock_and_return; - } - - /* format results & advance arg */ - error = formatter(&arg, &out, &full); - if (error || full) - goto unlock_and_return; - nexleft--; - bmv->bmv_offset = - out.bmv_offset + out.bmv_length; - bmv->bmv_length = MAX((__int64_t)0, - (__int64_t)(bmvend - bmv->bmv_offset)); - bmv->bmv_entries++; + goto out_free_map; } + + if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, + bmvend, map[i].br_startblock)) + goto out_free_map; + + /* format results & advance arg */ + error = formatter(&arg, &out, &full); + if (error || full) + goto out_free_map; + nexleft--; + bmv->bmv_offset = + out.bmv_offset + out.bmv_length; + bmv->bmv_length = + max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + bmv->bmv_entries++; } } while (nmap && nexleft && bmv->bmv_length); -unlock_and_return: + out_free_map: + kmem_free(map); + out_unlock_ilock: xfs_iunlock_map_shared(ip, lock); + out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - kmem_free(map); - return error; } -- cgit v1.2.3 From 28e211700a81b0a934b6c7a4b8e7dda843634d2f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2009 08:39:02 -0500 Subject: xfs: fix getbmap vs mmap deadlock xfs_getbmap (or rather the formatters called by it) copy out the getbmap structures under the ilock, which can deadlock against mmap. This has been reported via bugzilla a while ago (#717) and has recently also shown up via lockdep. So allocate a temporary buffer to format the kernel getbmap structures into and then copy them out after dropping the locks. A little problem with this is that we limit the number of extents we can copy out by the maximum allocation size, but I see no real way around that. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_bmap.c | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index abe42448b1c..ca7c6005a48 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5890,12 +5890,13 @@ xfs_getbmap( int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ - struct getbmapx out; /* output structure */ + struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ + int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; @@ -5971,6 +5972,13 @@ xfs_getbmap( return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; + + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) + return XFS_ERROR(ENOMEM); + out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); + if (!out) + return XFS_ERROR(ENOMEM); + xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { @@ -6025,39 +6033,39 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - int full = 0; /* user array is full */ - - out.bmv_oflags = 0; + out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) - out.bmv_oflags |= BMV_OF_PREALLOC; + out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) - out.bmv_oflags |= BMV_OF_DELALLOC; - out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); - out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - out.bmv_unused1 = out.bmv_unused2 = 0; + out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; + out[cur_ext].bmv_offset = + XFS_FSB_TO_BB(mp, map[i].br_startoff); + out[cur_ext].bmv_length = + XFS_FSB_TO_BB(mp, map[i].br_blockcount); + out[cur_ext].bmv_unused1 = 0; + out[cur_ext].bmv_unused2 = 0; ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || (map[i].br_startblock != DELAYSTARTBLOCK)); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ - out.bmv_oflags |= BMV_OF_LAST; + out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } - if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, - bmvend, map[i].br_startblock)) + if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], + prealloced, bmvend, + map[i].br_startblock)) goto out_free_map; - /* format results & advance arg */ - error = formatter(&arg, &out, &full); - if (error || full) - goto out_free_map; nexleft--; bmv->bmv_offset = - out.bmv_offset + out.bmv_length; + out[cur_ext].bmv_offset + + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); bmv->bmv_entries++; + cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); @@ -6067,6 +6075,16 @@ xfs_getbmap( xfs_iunlock_map_shared(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + for (i = 0; i < cur_ext; i++) { + int full = 0; /* user array is full */ + + /* format results & advance arg */ + error = formatter(&arg, &out[i], &full); + if (error || full) + break; + } + return error; } -- cgit v1.2.3 From 66345f50f070ae7412a28543ee197cb5eff73598 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:45:08 -0400 Subject: cifs: move #defines for mapchars into cifs_unicode.h Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifs_unicode.h | 13 +++++++++++++ fs/cifs/misc.c | 11 ----------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 14eb9a2395d..d6fe8ecd1ff 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -37,6 +37,19 @@ #define UNIUPR_NOLOWER /* Example to not expand lower case tables */ +/* + * Windows maps these to the user defined 16 bit Unicode range since they are + * reserved symbols (along with \ and /), otherwise illegal to store + * in filenames in NTFS + */ +#define UNI_ASTERIK (__u16) ('*' + 0xF000) +#define UNI_QUESTION (__u16) ('?' + 0xF000) +#define UNI_COLON (__u16) (':' + 0xF000) +#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) +#define UNI_LESSTHAN (__u16) ('<' + 0xF000) +#define UNI_PIPE (__u16) ('|' + 0xF000) +#define UNI_SLASH (__u16) ('\\' + 0xF000) + /* Just define what we want from uniupr.h. We don't want to define the tables * in each source file. */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4c89c572891..bb81c8af6a9 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -635,17 +635,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) return; } -/* Windows maps these to the user defined 16 bit Unicode range since they are - reserved symbols (along with \ and /), otherwise illegal to store - in filenames in NTFS */ -#define UNI_ASTERIK (__u16) ('*' + 0xF000) -#define UNI_QUESTION (__u16) ('?' + 0xF000) -#define UNI_COLON (__u16) (':' + 0xF000) -#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) -#define UNI_LESSTHAN (__u16) ('<' + 0xF000) -#define UNI_PIPE (__u16) ('|' + 0xF000) -#define UNI_SLASH (__u16) ('\\' + 0xF000) - /* Convert 16 bit Unicode pathname from wire format to string in current code page. Conversion may involve remapping up the seven characters that are only legal in POSIX-like OS (if they are present in the string). Path -- cgit v1.2.3 From 7fabf0c9479fef9fdb9528a5fbdb1cb744a744a4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:46:15 -0400 Subject: cifs: add replacement for cifs_strtoUCS_le called cifs_from_ucs2 Add a replacement function for cifs_strtoUCS_le. cifs_from_ucs2 takes args for the source and destination length so that we can ensure that the function is confined within the intended buffers. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/cifs_unicode.h | 2 + 2 files changed, 126 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 7d75272a6b3..8389f359b03 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -25,6 +25,130 @@ #include "cifsglob.h" #include "cifs_debug.h" +/* + * cifs_mapchar - convert a little-endian char to proper char in codepage + * @target - where converted character should be copied + * @src_char - 2 byte little-endian source character + * @cp - codepage to which character should be converted + * @mapchar - should character be mapped according to mapchars mount option? + * + * This function handles the conversion of a single character. It is the + * responsibility of the caller to ensure that the target buffer is large + * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). + */ +static int +cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, + bool mapchar) +{ + int len = 1; + + if (!mapchar) + goto cp_convert; + + /* + * BB: Cannot handle remapping UNI_SLASH until all the calls to + * build_path_from_dentry are modified, as they use slash as + * separator. + */ + switch (le16_to_cpu(src_char)) { + case UNI_COLON: + *target = ':'; + break; + case UNI_ASTERIK: + *target = '*'; + break; + case UNI_QUESTION: + *target = '?'; + break; + case UNI_PIPE: + *target = '|'; + break; + case UNI_GRTRTHAN: + *target = '>'; + break; + case UNI_LESSTHAN: + *target = '<'; + break; + default: + goto cp_convert; + } + +out: + return len; + +cp_convert: + len = cp->uni2char(le16_to_cpu(src_char), target, + NLS_MAX_CHARSET_SIZE); + if (len <= 0) { + *target = '?'; + len = 1; + } + goto out; +} + +/* + * cifs_from_ucs2 - convert utf16le string to local charset + * @to - destination buffer + * @from - source buffer + * @tolen - destination buffer size (in bytes) + * @fromlen - source buffer size (in bytes) + * @codepage - codepage to which characters should be converted + * @mapchar - should characters be remapped according to the mapchars option? + * + * Convert a little-endian ucs2le string (as sent by the server) to a string + * in the provided codepage. The tolen and fromlen parameters are to ensure + * that the code doesn't walk off of the end of the buffer (which is always + * a danger if the alignment of the source buffer is off). The destination + * string is always properly null terminated and fits in the destination + * buffer. Returns the length of the destination string in bytes (including + * null terminator). + * + * Note that some windows versions actually send multiword UTF-16 characters + * instead of straight UCS-2. The linux nls routines however aren't able to + * deal with those characters properly. In the event that we get some of + * those characters, they won't be translated properly. + */ +int +cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, + const struct nls_table *codepage, bool mapchar) +{ + int i, charlen, safelen; + int outlen = 0; + int nullsize = nls_nullsize(codepage); + int fromwords = fromlen / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + + /* + * because the chars can be of varying widths, we need to take care + * not to overflow the destination buffer when we get close to the + * end of it. Until we get to this offset, we don't need to check + * for overflow however. + */ + safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); + + for (i = 0; i < fromwords && from[i]; i++) { + /* + * check to see if converting this character might make the + * conversion bleed into the null terminator + */ + if (outlen >= safelen) { + charlen = cifs_mapchar(tmp, from[i], codepage, mapchar); + if ((outlen + charlen) > (tolen - nullsize)) + break; + } + + /* put converted char into 'to' buffer */ + charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar); + outlen += charlen; + } + + /* properly null-terminate string */ + for (i = 0; i < nullsize; i++) + to[outlen++] = 0; + + return outlen; +} + /* * NAME: cifs_strfromUCS() * diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index d6fe8ecd1ff..6aa6533e49f 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -72,6 +72,8 @@ extern struct UniCaseRange UniLowerRange[]; #endif /* UNIUPR_NOLOWER */ #ifdef __KERNEL__ +int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, + const struct nls_table *codepage, bool mapchar); int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); #endif -- cgit v1.2.3 From 69f801fcaa03be83d58c564f00913b7c172808e4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:46:32 -0400 Subject: cifs: add new function to get unicode string length in bytes Working in units of words means we do a lot of unnecessary conversion back and forth. Standardize on bytes instead since that's more useful for allocating buffers and such. Also, remove hostlen_fromUCS since the new function has a similar purpose. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 31 +++++++++++++++++++++++++++++++ fs/cifs/cifs_unicode.h | 2 ++ fs/cifs/cifssmb.c | 25 ++----------------------- 3 files changed, 35 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 8389f359b03..614512573c6 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -25,6 +25,37 @@ #include "cifsglob.h" #include "cifs_debug.h" +/* + * cifs_ucs2_bytes - how long will a string be after conversion? + * @ucs - pointer to input string + * @maxbytes - don't go past this many bytes of input string + * @codepage - destination codepage + * + * Walk a ucs2le string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + */ +int +cifs_ucs2_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage) +{ + int i; + int charlen, outlen = 0; + int maxwords = maxbytes / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + + for (i = 0; from[i] && i < maxwords; i++) { + charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, + NLS_MAX_CHARSET_SIZE); + if (charlen > 0) + outlen += charlen; + else + outlen++; + } + + return outlen; +} + /* * cifs_mapchar - convert a little-endian char to proper char in codepage * @target - where converted character should be copied diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 6aa6533e49f..1857f5ff933 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -74,6 +74,8 @@ extern struct UniCaseRange UniLowerRange[]; #ifdef __KERNEL__ int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, bool mapchar); +int cifs_ucs2_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage); int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); #endif diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a02c43b3faf..cadacae46b8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3928,27 +3928,6 @@ GetInodeNumOut: return rc; } -/* computes length of UCS string converted to host codepage - * @src: UCS string - * @maxlen: length of the input string in UCS characters - * (not in bytes) - * - * return: size of input string in host codepage - */ -static int hostlen_fromUCS(const __le16 *src, const int maxlen, - const struct nls_table *nls_codepage) { - int i; - int hostlen = 0; - char to[4]; - int charlen; - for (i = 0; (i < maxlen) && src[i]; ++i) { - charlen = nls_codepage->uni2char(le16_to_cpu(src[i]), - to, NLS_MAX_CHARSET_SIZE); - hostlen += charlen > 0 ? charlen : 1; - } - return hostlen; -} - /* parses DFS refferal V3 structure * caller is responsible for freeing target_nodes * returns: @@ -4016,8 +3995,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, GFP_KERNEL); cifsConvertToUCS((__le16 *) tmp, searchName, PATH_MAX, nls_codepage, remap); - node->path_consumed = hostlen_fromUCS(tmp, - le16_to_cpu(pSMBr->PathConsumed)/2, + node->path_consumed = cifs_ucs2_bytes(tmp, + le16_to_cpu(pSMBr->PathConsumed), nls_codepage); kfree(tmp); } else -- cgit v1.2.3 From 066ce6899484d9026acd6ba3a8dbbedb33d7ae1b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 07:16:14 -0400 Subject: cifs: rename cifs_strlcpy_to_host and make it use new functions Rename cifs_strlcpy_to_host to cifs_strndup since that better describes what this function really does. Then, convert it to use the new string conversion and measurement functions that work in units of bytes rather than wide chars. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 38 ++++++++++++++++++++++++++++++++++ fs/cifs/cifs_unicode.h | 2 ++ fs/cifs/cifssmb.c | 55 +++++++++++--------------------------------------- 3 files changed, 52 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 614512573c6..2a879cff3a4 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -243,3 +243,41 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, return i; } +/* + * cifs_strndup - copy a string from wire format to the local codepage + * @src - source string + * @maxlen - don't walk past this many bytes in the source string + * @is_unicode - is this a unicode string? + * @codepage - destination codepage + * + * Take a string given by the server, convert it to the local codepage and + * put it in a new buffer. Returns a pointer to the new string or NULL on + * error. + */ +char * +cifs_strndup(const char *src, const int maxlen, const bool is_unicode, + const struct nls_table *codepage) +{ + int len; + char *dst; + + if (is_unicode) { + len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage); + len += nls_nullsize(codepage); + dst = kmalloc(len, GFP_KERNEL); + if (!dst) + return NULL; + cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage, + false); + } else { + len = strnlen(src, maxlen); + len++; + dst = kmalloc(len, GFP_KERNEL); + if (!dst) + return NULL; + strlcpy(dst, src, len); + } + + return dst; +} + diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 1857f5ff933..e620f0b4220 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -78,6 +78,8 @@ int cifs_ucs2_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage); int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); +char *cifs_strndup(const char *src, const int maxlen, const bool is_unicode, + const struct nls_table *codepage); #endif /* diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cadacae46b8..f15848374cf 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -81,41 +81,6 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -/* Allocates buffer into dst and copies smb string from src to it. - * caller is responsible for freeing dst if function returned 0. - * returns: - * on success - 0 - * on failure - errno - */ -static int -cifs_strlcpy_to_host(char **dst, const char *src, const int maxlen, - const bool is_unicode, const struct nls_table *nls_codepage) -{ - int plen; - - if (is_unicode) { - plen = UniStrnlen((wchar_t *)src, maxlen); - *dst = kmalloc((4 * plen) + 2, GFP_KERNEL); - if (!*dst) - goto cifs_strlcpy_to_host_ErrExit; - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage); - (*dst)[plen] = 0; - (*dst)[plen+1] = 0; /* needed for Unicode */ - } else { - plen = strnlen(src, maxlen); - *dst = kmalloc(plen + 2, GFP_KERNEL); - if (!*dst) - goto cifs_strlcpy_to_host_ErrExit; - strlcpy(*dst, src, plen); - } - return 0; - -cifs_strlcpy_to_host_ErrExit: - cERROR(1, ("Failed to allocate buffer for string\n")); - return -ENOMEM; -} - - /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) @@ -4008,20 +3973,24 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy DfsPath */ temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); max_len = data_end - temp; - rc = cifs_strlcpy_to_host(&(node->path_name), temp, - max_len, is_unicode, nls_codepage); - if (rc) + node->path_name = cifs_strndup(temp, max_len, is_unicode, + nls_codepage); + if (IS_ERR(node->path_name)) { + rc = PTR_ERR(node->path_name); + node->path_name = NULL; goto parse_DFS_referrals_exit; + } /* copy link target UNC */ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); max_len = data_end - temp; - rc = cifs_strlcpy_to_host(&(node->node_name), temp, - max_len, is_unicode, nls_codepage); - if (rc) + node->node_name = cifs_strndup(temp, max_len, is_unicode, + nls_codepage); + if (IS_ERR(node->node_name)) { + rc = PTR_ERR(node->node_name); + node->node_name = NULL; goto parse_DFS_referrals_exit; - - ref += le16_to_cpu(ref->Size); + } } parse_DFS_referrals_exit: -- cgit v1.2.3 From cc20c031bb067eb3280a1c4b5c42295093e24863 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 07:16:21 -0400 Subject: cifs: convert CIFSTCon to use new unicode helper functions Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/connect.c | 49 ++++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index bacdef1546b..e94d6b29af6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3638,7 +3638,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, TCONX_RSP *pSMBr; unsigned char *bcc_ptr; int rc = 0; - int length; + int length, bytes_left; __u16 count; if (ses == NULL) @@ -3726,14 +3726,15 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, CIFS_STD_OP); - /* if (rc) rc = map_smb_to_linux_error(smb_buffer_response); */ /* above now done in SendReceive */ if ((rc == 0) && (tcon != NULL)) { tcon->tidStatus = CifsGood; tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); - length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); + bytes_left = BCC(smb_buffer_response); + length = strnlen(bcc_ptr, bytes_left - 2); + /* skip service field (NB: this field is always ASCII) */ if (length == 3) { if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && @@ -3748,39 +3749,17 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, } } bcc_ptr += length + 1; + bytes_left -= (length + 1); strncpy(tcon->treeName, tree, MAX_TREE_SIZE); - if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { - length = UniStrnlen((wchar_t *) bcc_ptr, 512); - if ((bcc_ptr + (2 * length)) - - pByteArea(smb_buffer_response) <= - BCC(smb_buffer_response)) { - kfree(tcon->nativeFileSystem); - tcon->nativeFileSystem = - kzalloc((4 * length) + 2, GFP_KERNEL); - if (tcon->nativeFileSystem) { - cifs_strfromUCS_le( - tcon->nativeFileSystem, - (__le16 *) bcc_ptr, - length, nls_codepage); - cFYI(1, ("nativeFileSystem=%s", - tcon->nativeFileSystem)); - } - } - /* else do not bother copying these information fields*/ - } else { - length = strnlen(bcc_ptr, 1024); - if ((bcc_ptr + length) - - pByteArea(smb_buffer_response) <= - BCC(smb_buffer_response)) { - kfree(tcon->nativeFileSystem); - tcon->nativeFileSystem = - kzalloc(length + 1, GFP_KERNEL); - if (tcon->nativeFileSystem) - strncpy(tcon->nativeFileSystem, bcc_ptr, - length); - } - /* else do not bother copying these information fields*/ - } + + /* mostly informational -- no need to fail on error here */ + tcon->nativeFileSystem = cifs_strndup(bcc_ptr, bytes_left, + smb_buffer->Flags2 & + SMBFLG2_UNICODE, + nls_codepage); + + cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem)); + if ((smb_buffer_response->WordCount == 3) || (smb_buffer_response->WordCount == 7)) /* field is in same location */ -- cgit v1.2.3 From 59140797c5817363087b0ffb46e6bb81a11fe0dc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 07:16:21 -0400 Subject: cifs: fix session setup unicode string saving to use new unicode helpers ...and change decode_unicode_ssetup to be a void function. It never returns an actual error anyway. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/sess.c | 80 ++++++++++++++++------------------------------------------ 1 file changed, 22 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index c652c73760d..93022dc9bab 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -277,12 +277,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, *pbcc_area = bcc_ptr; } -static int decode_unicode_ssetup(char **pbcc_area, int bleft, - struct cifsSesInfo *ses, - const struct nls_table *nls_cp) +static void +decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { - int rc = 0; - int words_left, len; + int len; char *data = *pbcc_area; cFYI(1, ("bleft %d", bleft)); @@ -300,63 +299,29 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, ++bleft; } - words_left = bleft / 2; - - /* save off server operating system */ - len = UniStrnlen((wchar_t *) data, words_left); - - if (len >= words_left) - return rc; - kfree(ses->serverOS); - /* UTF-8 string will not grow more than four times as big as UCS-16 */ - ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); - if (ses->serverOS != NULL) { - cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); - cFYI(1, ("serverOS=%s", ses->serverOS)); - } - data += 2 * (len + 1); - words_left -= len + 1; - - /* save off server network operating system */ - len = UniStrnlen((wchar_t *) data, words_left); - - if (len >= words_left) - return rc; + ses->serverOS = cifs_strndup(data, bleft, true, nls_cp); + cFYI(1, ("serverOS=%s", ses->serverOS)); + len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; + data += len; + bleft -= len; + if (bleft <= 0) + return; kfree(ses->serverNOS); - ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); - if (ses->serverNOS != NULL) { - cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, - nls_cp); - cFYI(1, ("serverNOS=%s", ses->serverNOS)); - if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) { - cFYI(1, ("NT4 server")); - ses->flags |= CIFS_SES_NT4; - } - } - data += 2 * (len + 1); - words_left -= len + 1; - - /* save off server domain */ - len = UniStrnlen((wchar_t *) data, words_left); - - if (len > words_left) - return rc; + ses->serverNOS = cifs_strndup(data, bleft, true, nls_cp); + cFYI(1, ("serverNOS=%s", ses->serverNOS)); + len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; + data += len; + bleft -= len; + if (bleft <= 0) + return; kfree(ses->serverDomain); - ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL); - if (ses->serverDomain != NULL) { - cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, - nls_cp); - cFYI(1, ("serverDomain=%s", ses->serverDomain)); - } - data += 2 * (len + 1); - words_left -= len + 1; + ses->serverDomain = cifs_strndup(data, bleft, true, nls_cp); + cFYI(1, ("serverDomain=%s", ses->serverDomain)); - cFYI(1, ("words left: %d", words_left)); - - return rc; + return; } static int decode_ascii_ssetup(char **pbcc_area, int bleft, @@ -709,8 +674,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, ++bcc_ptr; --bytes_remaining; } - rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, - ses, nls_cp); + decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); } else { rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); -- cgit v1.2.3 From 460b96960d1946914e50316ffeefe7b41dddce91 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 07:17:56 -0400 Subject: cifs: change CIFSSMBUnixQuerySymLink to use new helpers Change CIFSSMBUnixQuerySymLink to use the new unicode helper functions. Also change the calling conventions so that the allocation of the target name buffer is done in CIFSSMBUnixQuerySymLink rather than by the caller. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 3 +-- fs/cifs/cifssmb.c | 36 ++++++++++++++---------------------- fs/cifs/link.c | 22 +++++----------------- 3 files changed, 20 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 4167716d32f..7d54a5a4dd5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -260,8 +260,7 @@ extern int CIFSUnixCreateSymLink(const int xid, const struct nls_table *nls_codepage); extern int CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - char *syminfo, const int buflen, + const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage); extern int CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f15848374cf..dfb8e391d53 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2382,8 +2382,7 @@ winCreateHardLinkRetry: int CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - char *symlinkinfo, const int buflen, + const unsigned char *searchName, char **symlinkinfo, const struct nls_table *nls_codepage) { /* SMB_QUERY_FILE_UNIX_LINK */ @@ -2393,6 +2392,7 @@ CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, int bytes_returned; int name_len; __u16 params, byte_count; + char *data_start; cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName)); @@ -2447,30 +2447,22 @@ querySymLinkRetry: /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (rc || (pSMBr->ByteCount < 2)) /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ + if (rc || (pSMBr->ByteCount < 2)) + rc = -EIO; else { - __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - __u16 count = le16_to_cpu(pSMBr->t2.DataCount); + u16 count = le16_to_cpu(pSMBr->t2.DataCount); + + data_start = ((char *) &pSMBr->hdr.Protocol) + + le16_to_cpu(pSMBr->t2.DataOffset); - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = UniStrnlen((wchar_t *) ((char *) - &pSMBr->hdr.Protocol + data_offset), - min_t(const int, buflen, count) / 2); /* BB FIXME investigate remapping reserved chars here */ - cifs_strfromUCS_le(symlinkinfo, - (__le16 *) ((char *)&pSMBr->hdr.Protocol - + data_offset), - name_len, nls_codepage); - } else { - strncpy(symlinkinfo, - (char *) &pSMBr->hdr.Protocol + - data_offset, - min_t(const int, buflen, count)); - } - symlinkinfo[buflen] = 0; - /* just in case so calling code does not go off the end of buffer */ + *symlinkinfo = cifs_strndup(data_start, count, + pSMBr->hdr.Flags2 & + SMBFLG2_UNICODE, + nls_codepage); + if (!symlinkinfo) + rc = -ENOMEM; } } cifs_buf_release(pSMB); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 63f644000ce..e17a092f43e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -119,16 +119,11 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) full_path = build_path_from_dentry(direntry); if (!full_path) - goto out_no_free; + goto out; cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; - target_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (!target_path) { - target_path = ERR_PTR(-ENOMEM); - goto out; - } /* We could change this to: if (pTcon->unix_ext) @@ -138,8 +133,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) if (pTcon->ses->capabilities & CAP_UNIX) rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, - target_path, - PATH_MAX-1, + &target_path, cifs_sb->local_nls); else { /* BB add read reparse point symlink code here */ @@ -148,22 +142,16 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) /* BB Add MAC style xsymlink check here if enabled */ } - if (rc == 0) { - -/* BB Add special case check for Samba DFS symlinks */ - - target_path[PATH_MAX-1] = 0; - } else { + if (rc != 0) { kfree(target_path); target_path = ERR_PTR(rc); } -out: kfree(full_path); -out_no_free: +out: FreeXid(xid); nd_set_link(nd, target_path); - return NULL; /* No cookie */ + return NULL; } int -- cgit v1.2.3 From f58841666bc22e827ca0dcef7b71c7bc2758ce82 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 07:18:00 -0400 Subject: cifs: change cifs_get_name_from_search_buf to use new unicode helper ...and remove cifs_convertUCSpath. There are no more callers. Also add a #define for the buffer used in the readdir path so that we don't have so many magic numbers floating around. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 -- fs/cifs/misc.c | 60 ----------------------------------------------------- fs/cifs/readdir.c | 26 +++++++++++------------ 3 files changed, 13 insertions(+), 75 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 7d54a5a4dd5..fae083930ee 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -306,8 +306,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, __u64 *inode_number, const struct nls_table *nls_codepage, int remap_special_chars); -extern int cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, - const struct nls_table *codepage); extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen, const struct nls_table *cp, int mapChars); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bb81c8af6a9..e079a9190ec 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -635,66 +635,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) return; } -/* Convert 16 bit Unicode pathname from wire format to string in current code - page. Conversion may involve remapping up the seven characters that are - only legal in POSIX-like OS (if they are present in the string). Path - names are little endian 16 bit Unicode on the wire */ -int -cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, - const struct nls_table *cp) -{ - int i, j, len; - __u16 src_char; - - for (i = 0, j = 0; i < maxlen; i++) { - src_char = le16_to_cpu(source[i]); - switch (src_char) { - case 0: - goto cUCS_out; /* BB check this BB */ - case UNI_COLON: - target[j] = ':'; - break; - case UNI_ASTERIK: - target[j] = '*'; - break; - case UNI_QUESTION: - target[j] = '?'; - break; - /* BB We can not handle remapping slash until - all the calls to build_path_from_dentry - are modified, as they use slash as separator BB */ - /* case UNI_SLASH: - target[j] = '\\'; - break;*/ - case UNI_PIPE: - target[j] = '|'; - break; - case UNI_GRTRTHAN: - target[j] = '>'; - break; - case UNI_LESSTHAN: - target[j] = '<'; - break; - default: - len = cp->uni2char(src_char, &target[j], - NLS_MAX_CHARSET_SIZE); - if (len > 0) { - j += len; - continue; - } else { - target[j] = '?'; - } - } - j++; - /* make sure we do not overrun callers allocated temp buffer */ - if (j >= (2 * NAME_MAX)) - break; - } -cUCS_out: - target[j] = 0; - return j; -} - /* Convert 16 bit Unicode pathname to wire format from string in current code page. Conversion may involve remapping up the seven characters that are only legal in POSIX-like OS (if they are present in the string). Path diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ebd0da7ecb3..e1351fe18a1 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -31,6 +31,13 @@ #include "cifs_fs_sb.h" #include "cifsfs.h" +/* + * To be safe - for UCS to UTF-8 with strings loaded with the rare long + * characters alloc more to account for such multibyte target UTF-8 + * characters. + */ +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2) + #ifdef CONFIG_CIFS_DEBUG2 static void dump_cifs_file_struct(struct file *file, char *label) { @@ -881,14 +888,11 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, } if (unicode) { - /* BB fixme - test with long names */ - /* Note converted filename can be longer than in unicode */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - pqst->len = cifs_convertUCSpath((char *)pqst->name, - (__le16 *)filename, len/2, nlt); - else - pqst->len = cifs_strfromUCS_le((char *)pqst->name, - (__le16 *)filename, len/2, nlt); + pqst->len = cifs_from_ucs2((char *) pqst->name, + (__le16 *) filename, + UNICODE_NAME_MAX, max_len, nlt, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { pqst->name = filename; pqst->len = len; @@ -1070,11 +1074,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; - /* To be safe - for UCS to UTF-8 with strings loaded - with the rare long characters alloc more to account for - such multibyte target UTF-8 characters. cifs_unicode.c, - which actually does the conversion, has the same limit */ - tmp_buf = kmalloc((4 * NAME_MAX) + 2, GFP_KERNEL); + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); for (i = 0; (i < num_to_fill) && (rc == 0); i++) { if (current_entry == NULL) { /* evaluate whether this case is an error */ -- cgit v1.2.3 From 20418acd6874792359b42c12d159f42f17593f34 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 30 Apr 2009 16:13:32 +0000 Subject: [CIFS] Remove older session setup implementation Two years ago, when the session setup code in cifs was rewritten and moved to fs/cifs/sess.c, we were asked to keep the old code for a release or so (which could be reenabled at runtime) since it was such a large change and because the asn (SPNEGO) and NTLMSSP code was not rewritten and needed to be. This was useful to avoid regressions, but is long overdue to be removed. Now that the Kerberos (asn/spnego) code is working in fs/cifs/sess.c, and the NTLMSSP code moved (NTLMSSP blob setup be rewritten with the next patch in this series) quite a bit of dead code from fs/cifs/connect.c now can be removed. This old code should have been removed last year, but the earlier krb5 patches did not move/remove the NTLMSSP code which we had asked to be done first. Since no one else volunteered, I am doing it now. It is extremely important that we continue to examine the documentation for this area, to make sure our code continues to be uptodate with changes since Windows 2003. Signed-off-by: Steve French --- fs/cifs/connect.c | 381 +----------------------------------------------------- 1 file changed, 7 insertions(+), 374 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e94d6b29af6..579a628d1e6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2592,310 +2592,6 @@ out: return rc; } -static int -CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char session_key[CIFS_SESS_KEY_SIZE], - const struct nls_table *nls_codepage) -{ - struct smb_hdr *smb_buffer; - struct smb_hdr *smb_buffer_response; - SESSION_SETUP_ANDX *pSMB; - SESSION_SETUP_ANDX *pSMBr; - char *bcc_ptr; - char *user; - char *domain; - int rc = 0; - int remaining_words = 0; - int bytes_returned = 0; - int len; - __u32 capabilities; - __u16 count; - - cFYI(1, ("In sesssetup")); - if (ses == NULL) - return -EINVAL; - user = ses->userName; - domain = ses->domainName; - smb_buffer = cifs_buf_get(); - - if (smb_buffer == NULL) - return -ENOMEM; - - smb_buffer_response = smb_buffer; - pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer; - - /* send SMBsessionSetup here */ - header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX, - NULL /* no tCon exists yet */ , 13 /* wct */ ); - - smb_buffer->Mid = GetNextMid(ses->server); - pSMB->req_no_secext.AndXCommand = 0xFF; - pSMB->req_no_secext.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); - pSMB->req_no_secext.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - - if (ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - - capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | - CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; - if (ses->capabilities & CAP_UNICODE) { - smb_buffer->Flags2 |= SMBFLG2_UNICODE; - capabilities |= CAP_UNICODE; - } - if (ses->capabilities & CAP_STATUS32) { - smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS; - capabilities |= CAP_STATUS32; - } - if (ses->capabilities & CAP_DFS) { - smb_buffer->Flags2 |= SMBFLG2_DFS; - capabilities |= CAP_DFS; - } - pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - - pSMB->req_no_secext.CaseInsensitivePasswordLength = - cpu_to_le16(CIFS_SESS_KEY_SIZE); - - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(CIFS_SESS_KEY_SIZE); - bcc_ptr = pByteArea(smb_buffer); - memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE); - bcc_ptr += CIFS_SESS_KEY_SIZE; - memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE); - bcc_ptr += CIFS_SESS_KEY_SIZE; - - if (ses->capabilities & CAP_UNICODE) { - if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */ - *bcc_ptr = 0; - bcc_ptr++; - } - if (user == NULL) - bytes_returned = 0; /* skip null user */ - else - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, - nls_codepage); - /* convert number of 16 bit words to bytes */ - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; /* trailing null */ - if (domain == NULL) - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, - "CIFS_LINUX_DOM", 32, nls_codepage); - else - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64, - nls_codepage); - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ", - 32, nls_codepage); - bcc_ptr += 2 * bytes_returned; - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, - 32, nls_codepage); - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, - 64, nls_codepage); - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; - } else { - if (user != NULL) { - strncpy(bcc_ptr, user, 200); - bcc_ptr += strnlen(user, 200); - } - *bcc_ptr = 0; - bcc_ptr++; - if (domain == NULL) { - strcpy(bcc_ptr, "CIFS_LINUX_DOM"); - bcc_ptr += strlen("CIFS_LINUX_DOM") + 1; - } else { - strncpy(bcc_ptr, domain, 64); - bcc_ptr += strnlen(domain, 64); - *bcc_ptr = 0; - bcc_ptr++; - } - strcpy(bcc_ptr, "Linux version "); - bcc_ptr += strlen("Linux version "); - strcpy(bcc_ptr, utsname()->release); - bcc_ptr += strlen(utsname()->release) + 1; - strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); - bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; - } - count = (long) bcc_ptr - (long) pByteArea(smb_buffer); - smb_buffer->smb_buf_length += count; - pSMB->req_no_secext.ByteCount = cpu_to_le16(count); - - rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, - &bytes_returned, CIFS_LONG_OP); - if (rc) { -/* rc = map_smb_to_linux_error(smb_buffer_response); now done in SendReceive */ - } else if ((smb_buffer_response->WordCount == 3) - || (smb_buffer_response->WordCount == 4)) { - __u16 action = le16_to_cpu(pSMBr->resp.Action); - __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); - if (action & GUEST_LOGIN) - cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */ - ses->Suid = smb_buffer_response->Uid; /* UID left in wire format - (little endian) */ - cFYI(1, ("UID = %d ", ses->Suid)); - /* response can have either 3 or 4 word count - Samba sends 3 */ - bcc_ptr = pByteArea(smb_buffer_response); - if ((pSMBr->resp.hdr.WordCount == 3) - || ((pSMBr->resp.hdr.WordCount == 4) - && (blob_len < pSMBr->resp.ByteCount))) { - if (pSMBr->resp.hdr.WordCount == 4) - bcc_ptr += blob_len; - - if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { - if ((long) (bcc_ptr) % 2) { - remaining_words = - (BCC(smb_buffer_response) - 1) / 2; - /* Unicode strings must be word - aligned */ - bcc_ptr++; - } else { - remaining_words = - BCC(smb_buffer_response) / 2; - } - len = - UniStrnlen((wchar_t *) bcc_ptr, - remaining_words - 1); -/* We look for obvious messed up bcc or strings in response so we do not go off - the end since (at least) WIN2K and Windows XP have a major bug in not null - terminating last Unicode string in response */ - kfree(ses->serverOS); - ses->serverOS = kzalloc(2 * (len + 1), - GFP_KERNEL); - if (ses->serverOS == NULL) - goto sesssetup_nomem; - cifs_strfromUCS_le(ses->serverOS, - (__le16 *)bcc_ptr, - len, nls_codepage); - bcc_ptr += 2 * (len + 1); - remaining_words -= len + 1; - ses->serverOS[2 * len] = 0; - ses->serverOS[1 + (2 * len)] = 0; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *)bcc_ptr, - remaining_words-1); - kfree(ses->serverNOS); - ses->serverNOS = kzalloc(2 * (len + 1), - GFP_KERNEL); - if (ses->serverNOS == NULL) - goto sesssetup_nomem; - cifs_strfromUCS_le(ses->serverNOS, - (__le16 *)bcc_ptr, - len, nls_codepage); - bcc_ptr += 2 * (len + 1); - ses->serverNOS[2 * len] = 0; - ses->serverNOS[1 + (2 * len)] = 0; - if (strncmp(ses->serverNOS, - "NT LAN Manager 4", 16) == 0) { - cFYI(1, ("NT4 server")); - ses->flags |= CIFS_SES_NT4; - } - remaining_words -= len + 1; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string is not always null terminated - (for e.g. for Windows XP & 2000) */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2*(len+1), - GFP_KERNEL); - if (ses->serverDomain == NULL) - goto sesssetup_nomem; - cifs_strfromUCS_le(ses->serverDomain, - (__le16 *)bcc_ptr, - len, nls_codepage); - bcc_ptr += 2 * (len + 1); - ses->serverDomain[2*len] = 0; - ses->serverDomain[1+(2*len)] = 0; - } else { /* else no more room so create - dummy domain string */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2, GFP_KERNEL); - } - } else { /* no room so create dummy domain - and NOS string */ - - /* if these kcallocs fail not much we - can do, but better to not fail the - sesssetup itself */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2, GFP_KERNEL); - kfree(ses->serverNOS); - ses->serverNOS = - kzalloc(2, GFP_KERNEL); - } - } else { /* ASCII */ - len = strnlen(bcc_ptr, 1024); - if (((long) bcc_ptr + len) - (long) - pByteArea(smb_buffer_response) - <= BCC(smb_buffer_response)) { - kfree(ses->serverOS); - ses->serverOS = kzalloc(len + 1, - GFP_KERNEL); - if (ses->serverOS == NULL) - goto sesssetup_nomem; - strncpy(ses->serverOS, bcc_ptr, len); - - bcc_ptr += len; - /* null terminate the string */ - bcc_ptr[0] = 0; - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverNOS); - ses->serverNOS = kzalloc(len + 1, - GFP_KERNEL); - if (ses->serverNOS == NULL) - goto sesssetup_nomem; - strncpy(ses->serverNOS, bcc_ptr, len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverDomain); - ses->serverDomain = kzalloc(len + 1, - GFP_KERNEL); - if (ses->serverDomain == NULL) - goto sesssetup_nomem; - strncpy(ses->serverDomain, bcc_ptr, - len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - } else - cFYI(1, - ("Variable field of length %d " - "extends beyond end of smb ", - len)); - } - } else { - cERROR(1, ("Security Blob Length extends beyond " - "end of SMB")); - } - } else { - cERROR(1, ("Invalid Word count %d: ", - smb_buffer_response->WordCount)); - rc = -EIO; - } -sesssetup_nomem: /* do not return an error on nomem for the info strings, - since that could make reconnection harder, and - reconnection might be needed to free memory */ - cifs_buf_release(smb_buffer); - - return rc; -} - static int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, struct cifsSesInfo *ses, bool *pNTLMv2_flag, @@ -3229,6 +2925,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, return rc; } + static int CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, char *ntlm_session_key, bool ntlmv2_flag, @@ -3831,83 +3528,19 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, pSesInfo->capabilities = server->capabilities; if (linuxExtEnabled == 0) pSesInfo->capabilities &= (~CAP_UNIX); - /* pSesInfo->sequence_number = 0;*/ + cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", server->secMode, server->capabilities, server->timeAdj)); - if (experimEnabled < 2) - rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); - else if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (server->secType == NTLMSSP)) { - rc = -EOPNOTSUPP; - } else if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (server->secType == RawNTLMSSP)) { - cFYI(1, ("NTLMSSP sesssetup")); - rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, - nls_info); - if (!rc) { - if (ntlmv2_flag) { - char *v2_response; - cFYI(1, ("more secure NTLM ver2 hash")); - if (CalcNTLMv2_partial_mac_key(pSesInfo, - nls_info)) { - rc = -ENOMEM; - goto ss_err_exit; - } else - v2_response = kmalloc(16 + 64 /* blob*/, - GFP_KERNEL); - if (v2_response) { - CalcNTLMv2_response(pSesInfo, - v2_response); - /* if (first_time) - cifs_calculate_ntlmv2_mac_key */ - kfree(v2_response); - /* BB Put dummy sig in SessSetup PDU? */ - } else { - rc = -ENOMEM; - goto ss_err_exit; - } - - } else { - SMBNTencrypt(pSesInfo->password, - server->cryptKey, - ntlm_session_key); - - if (first_time) - cifs_calculate_mac_key( - &server->mac_signing_key, - ntlm_session_key, - pSesInfo->password); - } - /* for better security the weaker lanman hash not sent - in AuthSessSetup so we no longer calculate it */ - - rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo, - ntlm_session_key, - ntlmv2_flag, - nls_info); - } - } else { /* old style NTLM 0.12 session setup */ - SMBNTencrypt(pSesInfo->password, server->cryptKey, - ntlm_session_key); - - if (first_time) - cifs_calculate_mac_key(&server->mac_signing_key, - ntlm_session_key, - pSesInfo->password); - - rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); - } + rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); if (rc) { cERROR(1, ("Send error in SessSetup = %d", rc)); } else { cFYI(1, ("CIFS Session Established successfully")); - spin_lock(&GlobalMid_Lock); - pSesInfo->status = CifsGood; - pSesInfo->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); + spin_lock(&GlobalMid_Lock); + pSesInfo->status = CifsGood; + pSesInfo->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); } ss_err_exit: -- cgit v1.2.3 From 1af28ceb923d04357733642a3dbc4497da4db1c2 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 17 Mar 2009 19:00:30 +0300 Subject: Enable dfs submounts to handle remote referrals. Having remote dfs root support in cifs_mount, we can afford to pass into it UNC that is remote. Signed-off-by: Igor Mammedov Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 5fdbf8a1447..896b08fb1ec 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -341,27 +341,23 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) for (i = 0; i < num_referrals; i++) { dump_referral(referrals+i); - /* connect to a storage node */ - if (referrals[i].flags & DFSREF_STORAGE_SERVER) { - int len; - len = strlen(referrals[i].node_name); - if (len < 2) { - cERROR(1, ("%s: Net Address path too short: %s", + /* connect to a node */ + int len; + len = strlen(referrals[i].node_name); + if (len < 2) { + cERROR(1, ("%s: Net Address path too short: %s", __func__, referrals[i].node_name)); - rc = -EINVAL; - goto out_err; - } - mnt = cifs_dfs_do_refmount(nd->path.mnt, - nd->path.dentry, - referrals + i); - cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", - __func__, + rc = -EINVAL; + goto out_err; + } + mnt = cifs_dfs_do_refmount(nd->path.mnt, + nd->path.dentry, referrals + i); + cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, referrals[i].node_name, mnt)); - /* complete mount procedure if we accured submount */ - if (!IS_ERR(mnt)) - break; - } + /* complete mount procedure if we accured submount */ + if (!IS_ERR(mnt)) + break; } /* we need it cause for() above could exit without valid submount */ -- cgit v1.2.3 From 5c2503a8e339fbc82f49d5706c5a4ad650dd9711 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 21 Apr 2009 19:31:05 +0400 Subject: Added loop check when mounting DFS tree. Added loop check when mounting DFS tree. mount will fail with ELOOP if referral walks exceed MAX_NESTED_LINK count. Signed-off-by: Igor Mammedov Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 579a628d1e6..7e5d4fda493 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -2278,6 +2279,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, #ifdef CONFIG_CIFS_DFS_UPCALL struct dfs_info3_param *referrals = NULL; unsigned int num_referrals = 0; + int referral_walks_count = 0; try_mount_again: #endif full_path = NULL; @@ -2525,6 +2527,16 @@ remote_path_check: /* get referral if needed */ if (rc == -EREMOTE) { #ifdef CONFIG_CIFS_DFS_UPCALL + if (referral_walks_count > MAX_NESTED_LINKS) { + /* + * BB: when we implement proper loop detection, + * we will remove this check. But now we need it + * to prevent an indefinite loop if 'DFS tree' is + * misconfigured (i.e. has loops). + */ + rc = -ELOOP; + goto mount_fail_check; + } /* convert forward to back slashes in prepath here if needed */ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) convert_delimiter(cifs_sb->prepath, @@ -2558,6 +2570,7 @@ remote_path_check: cleanup_volume_info(&volume_info); FreeXid(xid); kfree(full_path); + referral_walks_count++; goto try_mount_again; } #else /* No DFS support, return error on mount */ -- cgit v1.2.3 From d185cda7712fd1d9e349174639d76eadc66679be Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 30 Apr 2009 17:45:10 +0000 Subject: [CIFS] rename cifs_strndup to cifs_strndup_from_ucs In most cases, cifs_strndup is converting from Unicode (UCS2 / UTF-32) to the configured local code page for the Linux mount (usually UTF8), so Jeff suggested that to make it more clear that cifs_strndup is doing a conversion not just memory allocation and copy, rename the function to including "from_ucs" (ie Unicode) Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 +++++- fs/cifs/cifs_unicode.c | 6 +++--- fs/cifs/cifs_unicode.h | 7 ++++--- fs/cifs/cifssmb.c | 12 ++++++------ fs/cifs/connect.c | 5 +++-- fs/cifs/sess.c | 8 ++++---- 6 files changed, 25 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1bf81813627..1b0643c2eac 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -2,7 +2,11 @@ Version 1.58 ------------ Guard against buffer overruns in various UCS-2 to UTF-8 string conversions when the UTF-8 string is composed of unusually long (more than 4 byte) converted -characters. +characters. Add support for mounting root of a share which redirects immediately +to DFS target. Convert string conversion functions from Unicode to more +accurately mark string length before allocating memory (which may help the +rare cases where a UTF-8 string is much larger than the UCS2 string that +we converted from). Version 1.57 ------------ diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 2a879cff3a4..6382720acf7 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -1,7 +1,7 @@ /* * fs/cifs/cifs_unicode.c * - * Copyright (c) International Business Machines Corp., 2000,2005 + * Copyright (c) International Business Machines Corp., 2000,2009 * Modified by Steve French (sfrench@us.ibm.com) * * This program is free software; you can redistribute it and/or modify @@ -244,7 +244,7 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, } /* - * cifs_strndup - copy a string from wire format to the local codepage + * cifs_strndup_from_ucs - copy a string from wire format to the local codepage * @src - source string * @maxlen - don't walk past this many bytes in the source string * @is_unicode - is this a unicode string? @@ -255,7 +255,7 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, * error. */ char * -cifs_strndup(const char *src, const int maxlen, const bool is_unicode, +cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, const struct nls_table *codepage) { int len; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index e620f0b4220..1570a701bf3 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -5,7 +5,7 @@ * Convert a unicode character to upper or lower case using * compressed tables. * - * Copyright (c) International Business Machines Corp., 2000,2007 + * Copyright (c) International Business Machines Corp., 2000,2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -78,8 +78,9 @@ int cifs_ucs2_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage); int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); -char *cifs_strndup(const char *src, const int maxlen, const bool is_unicode, - const struct nls_table *codepage); +char *cifs_strndup_from_ucs(const char *src, const int maxlen, + const bool is_unicode, + const struct nls_table *codepage); #endif /* diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index dfb8e391d53..df5276e628b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1,7 +1,7 @@ /* * fs/cifs/cifssmb.c * - * Copyright (C) International Business Machines Corp., 2002,2008 + * Copyright (C) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) * * Contains the routines for constructing the SMB PDUs themselves @@ -2457,7 +2457,7 @@ querySymLinkRetry: le16_to_cpu(pSMBr->t2.DataOffset); /* BB FIXME investigate remapping reserved chars here */ - *symlinkinfo = cifs_strndup(data_start, count, + *symlinkinfo = cifs_strndup_from_ucs(data_start, count, pSMBr->hdr.Flags2 & SMBFLG2_UNICODE, nls_codepage); @@ -3965,8 +3965,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy DfsPath */ temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); max_len = data_end - temp; - node->path_name = cifs_strndup(temp, max_len, is_unicode, - nls_codepage); + node->path_name = cifs_strndup_from_ucs(temp, max_len, + is_unicode, nls_codepage); if (IS_ERR(node->path_name)) { rc = PTR_ERR(node->path_name); node->path_name = NULL; @@ -3976,8 +3976,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy link target UNC */ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); max_len = data_end - temp; - node->node_name = cifs_strndup(temp, max_len, is_unicode, - nls_codepage); + node->node_name = cifs_strndup_from_ucs(temp, max_len, + is_unicode, nls_codepage); if (IS_ERR(node->node_name)) { rc = PTR_ERR(node->node_name); node->node_name = NULL; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7e5d4fda493..39f5362e2cb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1,7 +1,7 @@ /* * fs/cifs/connect.c * - * Copyright (C) International Business Machines Corp., 2002,2008 + * Copyright (C) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -3463,7 +3463,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, strncpy(tcon->treeName, tree, MAX_TREE_SIZE); /* mostly informational -- no need to fail on error here */ - tcon->nativeFileSystem = cifs_strndup(bcc_ptr, bytes_left, + tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr, + bytes_left, smb_buffer->Flags2 & SMBFLG2_UNICODE, nls_codepage); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 93022dc9bab..2bcff17047a 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -3,7 +3,7 @@ * * SMB/CIFS session setup handling routines * - * Copyright (c) International Business Machines Corp., 2006, 2007 + * Copyright (c) International Business Machines Corp., 2006, 2009 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -300,7 +300,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, } kfree(ses->serverOS); - ses->serverOS = cifs_strndup(data, bleft, true, nls_cp); + ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); cFYI(1, ("serverOS=%s", ses->serverOS)); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; @@ -309,7 +309,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, return; kfree(ses->serverNOS); - ses->serverNOS = cifs_strndup(data, bleft, true, nls_cp); + ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); cFYI(1, ("serverNOS=%s", ses->serverNOS)); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; @@ -318,7 +318,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, return; kfree(ses->serverDomain); - ses->serverDomain = cifs_strndup(data, bleft, true, nls_cp); + ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp); cFYI(1, ("serverDomain=%s", ses->serverDomain)); return; -- cgit v1.2.3 From dfa13f39b798fee68250abe1aed851395c8b51b5 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 29 Apr 2009 17:55:08 -0700 Subject: ocfs2: Fix a missing credit when deleting from indexed directories. The ocfs2 directory index updates two blocks when we remove an entry - the dx root and the dx leaf. OCFS2_DELETE_INODE_CREDITS was only accounting for the dx leaf. This shows up when ocfs2_delete_inode() runs out of credits in jbd2_journal_dirty_metadata() at "J_ASSERT_JH(jh, handle->h_buffer_credits > 0);". The test that caught this was running dirop_file_racer from the ocfs2-test suite with a 250-character filename PREFIX. Run on a 512B blocksize, it forces the orphan dir index to grow large enough to trigger. Signed-off-by: Joel Becker --- fs/ocfs2/journal.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 619dd7f6c05..eb7b76331eb 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -437,8 +437,9 @@ static inline int ocfs2_unlink_credits(struct super_block *sb) } /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + - * inode alloc group descriptor + orphan dir index leaf */ -#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3) + * inode alloc group descriptor + orphan dir index root + + * orphan dir index leaf */ +#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) /* dinode update, old dir dinode update, new dir dinode update, old * dir dir entry, new dir dir entry, dir entry update for renaming -- cgit v1.2.3 From 9e39b0ae8af46c83b85dae7ff5251911a80fce5a Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 30 Apr 2009 21:31:15 +0000 Subject: [CIFS] Remove unneeded QuerySymlink call and fix mapping for unmapped status Signed-off-by: Steve French --- fs/cifs/connect.c | 2 -- fs/cifs/link.c | 9 ++------- fs/cifs/netmisc.c | 2 ++ fs/cifs/nterr.h | 9 +++++++-- fs/cifs/smberr.h | 1 + 5 files changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 39f5362e2cb..9d2ebab53db 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3509,8 +3509,6 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_table *nls_info) { int rc = 0; - char ntlm_session_key[CIFS_SESS_KEY_SIZE]; - bool ntlmv2_flag = false; int first_time = 0; struct TCP_Server_Info *server = pSesInfo->server; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index e17a092f43e..eb2fbbe865d 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -257,13 +257,8 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) /* BB add read reparse point symlink code and Unix extensions symlink code here BB */ -/* We could disable this based on pTcon->unix_ext flag instead ... but why? */ - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) - rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, - tmpbuffer, - len - 1, - cifs_sb->local_nls); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { cERROR(1, ("SFU style symlinks not implemented yet")); /* add open and read as in fs/cifs/inode.c */ } else { diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 8703d68f5b2..e2fe998989a 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -79,6 +79,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ErrQuota, -EDQUOT}, {ErrNotALink, -ENOLINK}, {ERRnetlogonNotStarted, -ENOPROTOOPT}, + {ERRsymlink, -EOPNOTSUPP}, {ErrTooManyLinks, -EMLINK}, {0, 0} }; @@ -714,6 +715,7 @@ static const struct { ERRDOS, ERRnoaccess, 0xc000028f}, { ERRDOS, ERRnoaccess, 0xc0000290}, { ERRDOS, ERRbadfunc, 0xc000029c}, { + ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, { ERRDOS, ERRinvlevel, 0x007c0001}, }; /***************************************************************************** diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h index 588abbb9d08..257267367d4 100644 --- a/fs/cifs/nterr.h +++ b/fs/cifs/nterr.h @@ -35,8 +35,6 @@ struct nt_err_code_struct { extern const struct nt_err_code_struct nt_errs[]; /* Win32 Status codes. */ - -#define STATUS_BUFFER_OVERFLOW 0x80000005 #define STATUS_MORE_ENTRIES 0x0105 #define ERROR_INVALID_PARAMETER 0x0057 #define ERROR_INSUFFICIENT_BUFFER 0x007a @@ -50,6 +48,13 @@ extern const struct nt_err_code_struct nt_errs[]; #define STATUS_SOME_UNMAPPED 0x0107 #define STATUS_BUFFER_OVERFLOW 0x80000005 #define NT_STATUS_NO_MORE_ENTRIES 0x8000001a +#define NT_STATUS_MEDIA_CHANGED 0x8000001c +#define NT_STATUS_END_OF_MEDIA 0x8000001e +#define NT_STATUS_MEDIA_CHECK 0x80000020 +#define NT_STATUS_NO_DATA_DETECTED 0x8000001c +#define NT_STATUS_STOPPED_ON_SYMLINK 0x8000002d +#define NT_STATUS_DEVICE_REQUIRES_CLEANING 0x80000288 +#define NT_STATUS_DEVICE_DOOR_OPEN 0x80000288 #define NT_STATUS_UNSUCCESSFUL 0xC0000000 | 0x0001 #define NT_STATUS_NOT_IMPLEMENTED 0xC0000000 | 0x0002 #define NT_STATUS_INVALID_INFO_CLASS 0xC0000000 | 0x0003 diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index 7f50e8577c1..c5084d27db7 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -110,6 +110,7 @@ /* Below errors are used internally (do not come over the wire) for passthrough from STATUS codes to POSIX only */ +#define ERRsymlink 0xFFFD #define ErrTooManyLinks 0xFFFE /* Following error codes may be generated with the ERRSRV error class.*/ -- cgit v1.2.3 From 18295796a30cada84e933d805072dc2248d54f98 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 20:45:45 -0400 Subject: cifs: fix length handling in cifs_get_name_from_search_buf The earlier patch to move this code to use the new unicode helpers assumed that the filename strings would be null terminated. That's not always the case. Instead of passing "max_len" to the string converter, pass "min(len, max_len)", which makes it do the right thing while still keeping the parser confined to the response. Also fix up the prototypes of this function and the callers so that max_len is unsigned (like len is). Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index e1351fe18a1..5bc9ab7586b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -829,7 +829,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, /* inode num, inode type and filename returned */ static int cifs_get_name_from_search_buf(struct qstr *pqst, char *current_entry, __u16 level, unsigned int unicode, - struct cifs_sb_info *cifs_sb, int max_len, __u64 *pinum) + struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum) { int rc = 0; unsigned int len = 0; @@ -890,7 +890,8 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, if (unicode) { pqst->len = cifs_from_ucs2((char *) pqst->name, (__le16 *) filename, - UNICODE_NAME_MAX, max_len, nlt, + UNICODE_NAME_MAX, + min(len, max_len), nlt, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { @@ -902,8 +903,8 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, return rc; } -static int cifs_filldir(char *pfindEntry, struct file *file, - filldir_t filldir, void *direntry, char *scratch_buf, int max_len) +static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, + void *direntry, char *scratch_buf, unsigned int max_len) { int rc = 0; struct qstr qstring; @@ -1000,7 +1001,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) int num_to_fill = 0; char *tmp_buf = NULL; char *end_of_smb; - int max_len; + unsigned int max_len; xid = GetXid(); -- cgit v1.2.3 From cf398e3a117b2b63da724c2365d53ce31bd7240a Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 May 2009 03:50:42 +0000 Subject: [CIFS] Fix build warning Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 896b08fb1ec..83d62759c7c 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -340,9 +340,9 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); for (i = 0; i < num_referrals; i++) { + int len; dump_referral(referrals+i); /* connect to a node */ - int len; len = strlen(referrals[i].node_name); if (len < 2) { cERROR(1, ("%s: Net Address path too short: %s", -- cgit v1.2.3 From e14b2fe1e64d3e4bd2f328ff9d1969f318f55954 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 May 2009 04:37:43 +0000 Subject: [CIFS] Add remaining ntlmssp flags and standardize field names Signed-off-by: Steve French --- fs/cifs/connect.c | 30 ++++++++++++------------ fs/cifs/ntlmssp.h | 68 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 57 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9d2ebab53db..5bce2778163 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2686,13 +2686,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, /* setup pointers to domain name and workstation name */ bcc_ptr += SecurityBlobLength; - SecurityBlob->WorkstationName.Buffer = 0; + SecurityBlob->WorkstationName.BufferOffset = 0; SecurityBlob->WorkstationName.Length = 0; SecurityBlob->WorkstationName.MaximumLength = 0; /* Domain not sent on first Sesssetup in NTLMSSP, instead it is sent along with username on auth request (ie the response to challenge) */ - SecurityBlob->DomainName.Buffer = 0; + SecurityBlob->DomainName.BufferOffset = 0; SecurityBlob->DomainName.Length = 0; SecurityBlob->DomainName.MaximumLength = 0; if (ses->capabilities & CAP_UNICODE) { @@ -3020,30 +3020,30 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* setup pointers to domain name and workstation name */ - SecurityBlob->WorkstationName.Buffer = 0; + SecurityBlob->WorkstationName.BufferOffset = 0; SecurityBlob->WorkstationName.Length = 0; SecurityBlob->WorkstationName.MaximumLength = 0; SecurityBlob->SessionKey.Length = 0; SecurityBlob->SessionKey.MaximumLength = 0; - SecurityBlob->SessionKey.Buffer = 0; + SecurityBlob->SessionKey.BufferOffset = 0; SecurityBlob->LmChallengeResponse.Length = 0; SecurityBlob->LmChallengeResponse.MaximumLength = 0; - SecurityBlob->LmChallengeResponse.Buffer = 0; + SecurityBlob->LmChallengeResponse.BufferOffset = 0; SecurityBlob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); SecurityBlob->NtChallengeResponse.MaximumLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE); - SecurityBlob->NtChallengeResponse.Buffer = + SecurityBlob->NtChallengeResponse.BufferOffset = cpu_to_le32(SecurityBlobLength); SecurityBlobLength += CIFS_SESS_KEY_SIZE; bcc_ptr += CIFS_SESS_KEY_SIZE; if (ses->capabilities & CAP_UNICODE) { if (domain == NULL) { - SecurityBlob->DomainName.Buffer = 0; + SecurityBlob->DomainName.BufferOffset = 0; SecurityBlob->DomainName.Length = 0; SecurityBlob->DomainName.MaximumLength = 0; } else { @@ -3052,14 +3052,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, ln *= 2; SecurityBlob->DomainName.MaximumLength = cpu_to_le16(ln); - SecurityBlob->DomainName.Buffer = + SecurityBlob->DomainName.BufferOffset = cpu_to_le32(SecurityBlobLength); bcc_ptr += ln; SecurityBlobLength += ln; SecurityBlob->DomainName.Length = cpu_to_le16(ln); } if (user == NULL) { - SecurityBlob->UserName.Buffer = 0; + SecurityBlob->UserName.BufferOffset = 0; SecurityBlob->UserName.Length = 0; SecurityBlob->UserName.MaximumLength = 0; } else { @@ -3068,7 +3068,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, ln *= 2; SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln); - SecurityBlob->UserName.Buffer = + SecurityBlob->UserName.BufferOffset = cpu_to_le32(SecurityBlobLength); bcc_ptr += ln; SecurityBlobLength += ln; @@ -3080,7 +3080,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, SecurityBlob->WorkstationName.Length *= 2; SecurityBlob->WorkstationName.MaximumLength = cpu_to_le16(SecurityBlob->WorkstationName.Length); - SecurityBlob->WorkstationName.Buffer = + SecurityBlob->WorkstationName.BufferOffset = cpu_to_le32(SecurityBlobLength); bcc_ptr += SecurityBlob->WorkstationName.Length; SecurityBlobLength += SecurityBlob->WorkstationName.Length; @@ -3112,7 +3112,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr += 2; /* null domain */ } else { /* ASCII */ if (domain == NULL) { - SecurityBlob->DomainName.Buffer = 0; + SecurityBlob->DomainName.BufferOffset = 0; SecurityBlob->DomainName.Length = 0; SecurityBlob->DomainName.MaximumLength = 0; } else { @@ -3122,14 +3122,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, ln = strnlen(domain, 64); SecurityBlob->DomainName.MaximumLength = cpu_to_le16(ln); - SecurityBlob->DomainName.Buffer = + SecurityBlob->DomainName.BufferOffset = cpu_to_le32(SecurityBlobLength); bcc_ptr += ln; SecurityBlobLength += ln; SecurityBlob->DomainName.Length = cpu_to_le16(ln); } if (user == NULL) { - SecurityBlob->UserName.Buffer = 0; + SecurityBlob->UserName.BufferOffset = 0; SecurityBlob->UserName.Length = 0; SecurityBlob->UserName.MaximumLength = 0; } else { @@ -3137,7 +3137,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, strncpy(bcc_ptr, user, 63); ln = strnlen(user, 64); SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln); - SecurityBlob->UserName.Buffer = + SecurityBlob->UserName.BufferOffset = cpu_to_le32(SecurityBlobLength); bcc_ptr += ln; SecurityBlobLength += ln; diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index c377d8065d9..49c9a4e7531 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -27,29 +27,39 @@ #define UnknownMessage cpu_to_le32(8) /* Negotiate Flags */ -#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are in unicode */ -#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */ -#define NTLMSSP_REQUEST_TARGET 0x04 /* Server return its auth realm */ -#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signature capability */ -#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */ -#define NTLMSSP_NEGOTIATE_DGRAM 0x0040 -#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Sign/seal use LM session key */ -#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */ -#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 +#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are unicode */ +#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */ +#define NTLMSSP_REQUEST_TARGET 0x04 /* Srv returns its auth realm */ +/* define reserved9 0x08 */ +#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signing capability */ +#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */ +#define NTLMSSP_NEGOTIATE_DGRAM 0x0040 +#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Use LM session key */ +/* defined reserved 8 0x0100 */ +#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */ +#define NTLMSSP_NEGOTIATE_NT_ONLY 0x0400 /* Lanman not allowed */ +#define NTLMSSP_ANONYMOUS 0x0800 +#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 /* reserved6 */ #define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000 -#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server on same machine */ -#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign for all security levels */ -#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000 -#define NTLMSSP_TARGET_TYPE_SERVER 0x20000 -#define NTLMSSP_TARGET_TYPE_SHARE 0x40000 -#define NTLMSSP_NEGOTIATE_NTLMV2 0x80000 -#define NTLMSSP_REQUEST_INIT_RESP 0x100000 -#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 -#define NTLMSSP_REQUEST_NOT_NT_KEY 0x400000 +#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server same machine */ +#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign. All security levels */ +#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000 +#define NTLMSSP_TARGET_TYPE_SERVER 0x20000 +#define NTLMSSP_TARGET_TYPE_SHARE 0x40000 +#define NTLMSSP_NEGOTIATE_EXTENDED_SEC 0x80000 /* NB:not related to NTLMv2 pwd*/ +/* #define NTLMSSP_REQUEST_INIT_RESP 0x100000 */ +#define NTLMSSP_NEGOTIATE_IDENTIFY 0x100000 +#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 /* reserved5 */ +#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000 #define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000 -#define NTLMSSP_NEGOTIATE_128 0x20000000 -#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 -#define NTLMSSP_NEGOTIATE_56 0x80000000 +/* #define reserved4 0x1000000 */ +#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */ +/* #define reserved3 0x4000000 */ +/* #define reserved2 0x8000000 */ +/* #define reserved1 0x10000000 */ +#define NTLMSSP_NEGOTIATE_128 0x20000000 +#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 +#define NTLMSSP_NEGOTIATE_56 0x80000000 /* Although typedefs are not commonly used for structure definitions */ /* in the Linux kernel, in this particular case they are useful */ @@ -60,32 +70,36 @@ typedef struct _SECURITY_BUFFER { __le16 Length; __le16 MaximumLength; - __le32 Buffer; /* offset to buffer */ + __le32 BufferOffset; /* offset to buffer */ } __attribute__((packed)) SECURITY_BUFFER; typedef struct _NEGOTIATE_MESSAGE { __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; - __le32 MessageType; /* 1 */ + __le32 MessageType; /* NtLmNegotiate = 1 */ __le32 NegotiateFlags; SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */ SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */ + /* SECURITY_BUFFER for version info not present since we + do not set the version is present flag */ char DomainString[0]; /* followed by WorkstationString */ } __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE; typedef struct _CHALLENGE_MESSAGE { __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; - __le32 MessageType; /* 2 */ + __le32 MessageType; /* NtLmChallenge = 2 */ SECURITY_BUFFER TargetName; __le32 NegotiateFlags; __u8 Challenge[CIFS_CRYPTO_KEY_SIZE]; __u8 Reserved[8]; SECURITY_BUFFER TargetInfoArray; + /* SECURITY_BUFFER for version info not present since we + do not set the version is present flag */ } __attribute__((packed)) CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE; typedef struct _AUTHENTICATE_MESSAGE { - __u8 Signature[sizeof (NTLMSSP_SIGNATURE)]; - __le32 MessageType; /* 3 */ + __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; + __le32 MessageType; /* NtLmsAuthenticate = 3 */ SECURITY_BUFFER LmChallengeResponse; SECURITY_BUFFER NtChallengeResponse; SECURITY_BUFFER DomainName; @@ -93,5 +107,7 @@ typedef struct _AUTHENTICATE_MESSAGE { SECURITY_BUFFER WorkstationName; SECURITY_BUFFER SessionKey; __le32 NegotiateFlags; + /* SECURITY_BUFFER for version info not present since we + do not set the version is present flag */ char UserString[0]; } __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE; -- cgit v1.2.3 From 0e0d2cf32743c660aab20e40aeb2155c06a256db Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 May 2009 05:27:32 +0000 Subject: [CIFS] Remove sparse endian warnings Removes two sparse CHECK_ENDIAN warnings from Jeffs earlier patch, and removes the dead readlink code (after noting where in findfirst we will need to add something like that in the future to handle the newly discovered unexpected error on FindFirst of NTFS symlinks. Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 14 +++++---- fs/cifs/connect.c | 23 ++++++++------- fs/cifs/link.c | 87 ------------------------------------------------------- fs/cifs/readdir.c | 37 ++++++++++++++++++++++- 4 files changed, 58 insertions(+), 103 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index df5276e628b..b968e5bd7df 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2451,16 +2451,20 @@ querySymLinkRetry: if (rc || (pSMBr->ByteCount < 2)) rc = -EIO; else { + bool is_unicode; u16 count = le16_to_cpu(pSMBr->t2.DataCount); data_start = ((char *) &pSMBr->hdr.Protocol) + le16_to_cpu(pSMBr->t2.DataOffset); + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) + is_unicode = true; + else + is_unicode = false; + /* BB FIXME investigate remapping reserved chars here */ *symlinkinfo = cifs_strndup_from_ucs(data_start, count, - pSMBr->hdr.Flags2 & - SMBFLG2_UNICODE, - nls_codepage); + is_unicode, nls_codepage); if (!symlinkinfo) rc = -ENOMEM; } @@ -3930,7 +3934,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n", *num_of_nodes, - le16_to_cpu(pSMBr->DFSFlags))); + le32_to_cpu(pSMBr->DFSFlags))); *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * *num_of_nodes, GFP_KERNEL); @@ -3946,7 +3950,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, int max_len; struct dfs_info3_param *node = (*target_nodes)+i; - node->flags = le16_to_cpu(pSMBr->DFSFlags); + node->flags = le32_to_cpu(pSMBr->DFSFlags); if (is_unicode) { __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, GFP_KERNEL); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5bce2778163..9dcdb0c707e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2681,8 +2681,6 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; if (sign_CIFS_PDUs) negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; -/* if (ntlmv2_support) - negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/ /* setup pointers to domain name and workstation name */ bcc_ptr += SecurityBlobLength; @@ -2780,9 +2778,10 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, memcpy(ses->server->cryptKey, SecurityBlob2->Challenge, CIFS_CRYPTO_KEY_SIZE); - if (SecurityBlob2->NegotiateFlags & +/* NTLMV2 flag is not for NTLMv2 password hash */ +/* if (SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) - *pNTLMv2_flag = true; + *pNTLMv2_flag = true; */ /* BB wrong */ if ((SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) @@ -3012,11 +3011,10 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr += SecurityBlobLength; negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO | - 0x80000000 | NTLMSSP_NEGOTIATE_128; + NTLMSSP_NEGOTIATE_56 | NTLMSSP_NEGOTIATE_128 | + NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (sign_CIFS_PDUs) negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN; - if (ntlmv2_flag) - negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2; /* setup pointers to domain name and workstation name */ @@ -3438,12 +3436,19 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, /* above now done in SendReceive */ if ((rc == 0) && (tcon != NULL)) { + bool is_unicode; + tcon->tidStatus = CifsGood; tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); bytes_left = BCC(smb_buffer_response); length = strnlen(bcc_ptr, bytes_left - 2); + if (smb_buffer->Flags2 & SMBFLG2_UNICODE) + is_unicode = true; + else + is_unicode = false; + /* skip service field (NB: this field is always ASCII) */ if (length == 3) { @@ -3464,9 +3469,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, /* mostly informational -- no need to fail on error here */ tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr, - bytes_left, - smb_buffer->Flags2 & - SMBFLG2_UNICODE, + bytes_left, is_unicode, nls_codepage); cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem)); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index eb2fbbe865d..ea9d11e3dcb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -212,93 +212,6 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) return rc; } -int -cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) -{ - struct inode *inode = direntry->d_inode; - int rc = -EACCES; - int xid; - int oplock = 0; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - char *full_path = NULL; - char *tmpbuffer; - int len; - __u16 fid; - - xid = GetXid(); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - -/* BB would it be safe against deadlock to grab this sem - even though rename itself grabs the sem and calls lookup? */ -/* mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/ - full_path = build_path_from_dentry(direntry); -/* mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/ - - if (full_path == NULL) { - FreeXid(xid); - return -ENOMEM; - } - - cFYI(1, - ("Full path: %s inode = 0x%p pBuffer = 0x%p buflen = %d", - full_path, inode, pBuffer, buflen)); - if (buflen > PATH_MAX) - len = PATH_MAX; - else - len = buflen; - tmpbuffer = kmalloc(len, GFP_KERNEL); - if (tmpbuffer == NULL) { - kfree(full_path); - FreeXid(xid); - return -ENOMEM; - } - -/* BB add read reparse point symlink code and - Unix extensions symlink code here BB */ - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - cERROR(1, ("SFU style symlinks not implemented yet")); - /* add open and read as in fs/cifs/inode.c */ - } else { - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ, - OPEN_REPARSE_POINT, &fid, &oplock, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (!rc) { - rc = CIFSSMBQueryReparseLinkInfo(xid, pTcon, full_path, - tmpbuffer, - len - 1, - fid, - cifs_sb->local_nls); - if (CIFSSMBClose(xid, pTcon, fid)) { - cFYI(1, ("Error closing junction point " - "(open for ioctl)")); - } - /* If it is a DFS junction earlier we would have gotten - PATH_NOT_COVERED returned from server so we do - not need to request the DFS info here */ - } - } - /* BB Anything else to do to handle recursive links? */ - /* BB Should we be using page ops here? */ - - /* BB null terminate returned string in pBuffer? BB */ - if (rc == 0) { - rc = vfs_readlink(direntry, pBuffer, len, tmpbuffer); - cFYI(1, - ("vfs_readlink called from cifs_readlink returned %d", - rc)); - } - - kfree(tmpbuffer); - kfree(full_path); - FreeXid(xid); - return rc; -} - void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) { char *p = nd_get_link(nd); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5bc9ab7586b..df003fe3710 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -445,6 +445,38 @@ static void unix_fill_in_inode(struct inode *tmp_inode, } } +/* BB eventually need to add the following helper function to + resolve NT_STATUS_STOPPED_ON_SYMLINK return code when + we try to do FindFirst on (NTFS) directory symlinks */ +/* +int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, + int xid) +{ + __u16 fid; + int len; + int oplock = 0; + int rc; + struct cifsTconInfo *ptcon = cifs_sb->tcon; + char *tmpbuffer; + + rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, + OPEN_REPARSE_POINT, &fid, &oplock, NULL, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (!rc) { + tmpbuffer = kmalloc(maxpath); + rc = CIFSSMBQueryReparseLinkInfo(xid, ptcon, full_path, + tmpbuffer, + maxpath -1, + fid, + cifs_sb->local_nls); + if (CIFSSMBClose(xid, ptcon, fid)) { + cFYI(1, ("Error closing temporary reparsepoint open)")); + } + } +} + */ + static int initiate_cifs_search(const int xid, struct file *file) { int rc = 0; @@ -500,7 +532,10 @@ ffirst_retry: CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) cifsFile->invalidHandle = false; - if ((rc == -EOPNOTSUPP) && + /* BB add following call to handle readdir on new NTFS symlink errors + else if STATUS_STOPPED_ON_SYMLINK + call get_symlink_reparse_path and retry with new path */ + else if ((rc == -EOPNOTSUPP) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; goto ffirst_retry; -- cgit v1.2.3 From e836f015b5c07da2f95a441274ef0a788ce17f80 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 May 2009 16:20:35 +0000 Subject: [CIFS] Remove trailing whitespace Signed-off-by: Steve French --- fs/cifs/CHANGES | 4 +++- fs/cifs/readdir.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1b0643c2eac..43f2e0d061f 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -6,7 +6,9 @@ characters. Add support for mounting root of a share which redirects immediately to DFS target. Convert string conversion functions from Unicode to more accurately mark string length before allocating memory (which may help the rare cases where a UTF-8 string is much larger than the UCS2 string that -we converted from). +we converted from). Fix endianness of the vcnum field used during +session setup to distinguish multiple mounts to same server from different +userids. Version 1.57 ------------ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index df003fe3710..964e097c820 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -532,7 +532,7 @@ ffirst_retry: CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) cifsFile->invalidHandle = false; - /* BB add following call to handle readdir on new NTFS symlink errors + /* BB add following call to handle readdir on new NTFS symlink errors else if STATUS_STOPPED_ON_SYMLINK call get_symlink_reparse_path and retry with new path */ else if ((rc == -EOPNOTSUPP) && -- cgit v1.2.3 From 051a2a0d3242b448281376bb63cfa9385e0b6c68 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 May 2009 16:21:04 +0000 Subject: [CIFS] Fix endian conversion of vcnum field When multiply mounting from the same client to the same server, with different userids, we create a vcnum which should be unique if possible (this is not the same as the smb uid, which is the handle to the security context). We were not endian converting additional (beyond the first which is zero) vcnum properly. CC: Stable Acked-by: Shirish Pargaonkar Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 2bcff17047a..b2bdc2a3383 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -111,7 +111,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses) get_vc_num_exit: write_unlock(&cifs_tcp_ses_lock); - return le16_to_cpu(vcnum); + return cpu_to_le16(vcnum); } static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) -- cgit v1.2.3 From 2edd6c5b0517b9131ede9e74cb121898ccd73042 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 2 May 2009 04:55:39 +0000 Subject: [CIFS] NTLMSSP support moving into new file, old dead code removed Remove dead NTLMSSP support from connect.c prior to addition of the new code to replace it. Signed-off-by: Steve French --- fs/cifs/connect.c | 730 ------------------------------------------------------ 1 file changed, 730 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9dcdb0c707e..3a934dd8422 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2605,736 +2605,6 @@ out: return rc; } -static int -CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, - struct cifsSesInfo *ses, bool *pNTLMv2_flag, - const struct nls_table *nls_codepage) -{ - struct smb_hdr *smb_buffer; - struct smb_hdr *smb_buffer_response; - SESSION_SETUP_ANDX *pSMB; - SESSION_SETUP_ANDX *pSMBr; - char *bcc_ptr; - char *domain; - int rc = 0; - int remaining_words = 0; - int bytes_returned = 0; - int len; - int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE); - PNEGOTIATE_MESSAGE SecurityBlob; - PCHALLENGE_MESSAGE SecurityBlob2; - __u32 negotiate_flags, capabilities; - __u16 count; - - cFYI(1, ("In NTLMSSP sesssetup (negotiate)")); - if (ses == NULL) - return -EINVAL; - domain = ses->domainName; - *pNTLMv2_flag = false; - smb_buffer = cifs_buf_get(); - if (smb_buffer == NULL) { - return -ENOMEM; - } - smb_buffer_response = smb_buffer; - pSMB = (SESSION_SETUP_ANDX *) smb_buffer; - pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response; - - /* send SMBsessionSetup here */ - header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX, - NULL /* no tCon exists yet */ , 12 /* wct */ ); - - smb_buffer->Mid = GetNextMid(ses->server); - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT); - - pSMB->req.AndXCommand = 0xFF; - pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); - pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - - if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - - capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | - CAP_EXTENDED_SECURITY; - if (ses->capabilities & CAP_UNICODE) { - smb_buffer->Flags2 |= SMBFLG2_UNICODE; - capabilities |= CAP_UNICODE; - } - if (ses->capabilities & CAP_STATUS32) { - smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS; - capabilities |= CAP_STATUS32; - } - if (ses->capabilities & CAP_DFS) { - smb_buffer->Flags2 |= SMBFLG2_DFS; - capabilities |= CAP_DFS; - } - pSMB->req.Capabilities = cpu_to_le32(capabilities); - - bcc_ptr = (char *) &pSMB->req.SecurityBlob; - SecurityBlob = (PNEGOTIATE_MESSAGE) bcc_ptr; - strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8); - SecurityBlob->MessageType = NtLmNegotiate; - negotiate_flags = - NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_OEM | - NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM | - NTLMSSP_NEGOTIATE_56 | - /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; - if (sign_CIFS_PDUs) - negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; - /* setup pointers to domain name and workstation name */ - bcc_ptr += SecurityBlobLength; - - SecurityBlob->WorkstationName.BufferOffset = 0; - SecurityBlob->WorkstationName.Length = 0; - SecurityBlob->WorkstationName.MaximumLength = 0; - - /* Domain not sent on first Sesssetup in NTLMSSP, instead it is sent - along with username on auth request (ie the response to challenge) */ - SecurityBlob->DomainName.BufferOffset = 0; - SecurityBlob->DomainName.Length = 0; - SecurityBlob->DomainName.MaximumLength = 0; - if (ses->capabilities & CAP_UNICODE) { - if ((long) bcc_ptr % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ", - 32, nls_codepage); - bcc_ptr += 2 * bytes_returned; - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32, - nls_codepage); - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; /* null terminate Linux version */ - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, - 64, nls_codepage); - bcc_ptr += 2 * bytes_returned; - *(bcc_ptr + 1) = 0; - *(bcc_ptr + 2) = 0; - bcc_ptr += 2; /* null terminate network opsys string */ - *(bcc_ptr + 1) = 0; - *(bcc_ptr + 2) = 0; - bcc_ptr += 2; /* null domain */ - } else { /* ASCII */ - strcpy(bcc_ptr, "Linux version "); - bcc_ptr += strlen("Linux version "); - strcpy(bcc_ptr, utsname()->release); - bcc_ptr += strlen(utsname()->release) + 1; - strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); - bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; - bcc_ptr++; /* empty domain field */ - *bcc_ptr = 0; - } - SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags); - pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength); - count = (long) bcc_ptr - (long) pByteArea(smb_buffer); - smb_buffer->smb_buf_length += count; - pSMB->req.ByteCount = cpu_to_le16(count); - - rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, - &bytes_returned, CIFS_LONG_OP); - - if (smb_buffer_response->Status.CifsError == - cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)) - rc = 0; - - if (rc) { -/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */ - } else if ((smb_buffer_response->WordCount == 3) - || (smb_buffer_response->WordCount == 4)) { - __u16 action = le16_to_cpu(pSMBr->resp.Action); - __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); - - if (action & GUEST_LOGIN) - cFYI(1, ("Guest login")); - /* Do we want to set anything in SesInfo struct when guest login? */ - - bcc_ptr = pByteArea(smb_buffer_response); - /* response can have either 3 or 4 word count - Samba sends 3 */ - - SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr; - if (SecurityBlob2->MessageType != NtLmChallenge) { - cFYI(1, ("Unexpected NTLMSSP message type received %d", - SecurityBlob2->MessageType)); - } else if (ses) { - ses->Suid = smb_buffer_response->Uid; /* UID left in le format */ - cFYI(1, ("UID = %d", ses->Suid)); - if ((pSMBr->resp.hdr.WordCount == 3) - || ((pSMBr->resp.hdr.WordCount == 4) - && (blob_len < - pSMBr->resp.ByteCount))) { - - if (pSMBr->resp.hdr.WordCount == 4) { - bcc_ptr += blob_len; - cFYI(1, ("Security Blob Length %d", - blob_len)); - } - - cFYI(1, ("NTLMSSP Challenge rcvd")); - - memcpy(ses->server->cryptKey, - SecurityBlob2->Challenge, - CIFS_CRYPTO_KEY_SIZE); -/* NTLMV2 flag is not for NTLMv2 password hash */ -/* if (SecurityBlob2->NegotiateFlags & - cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) - *pNTLMv2_flag = true; */ /* BB wrong */ - - if ((SecurityBlob2->NegotiateFlags & - cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) - || (sign_CIFS_PDUs > 1)) - ses->server->secMode |= - SECMODE_SIGN_REQUIRED; - if ((SecurityBlob2->NegotiateFlags & - cpu_to_le32(NTLMSSP_NEGOTIATE_SIGN)) && (sign_CIFS_PDUs)) - ses->server->secMode |= - SECMODE_SIGN_ENABLED; - - if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { - if ((long) (bcc_ptr) % 2) { - remaining_words = - (BCC(smb_buffer_response) - - 1) / 2; - /* Must word align unicode strings */ - bcc_ptr++; - } else { - remaining_words = - BCC - (smb_buffer_response) / 2; - } - len = - UniStrnlen((wchar_t *) bcc_ptr, - remaining_words - 1); -/* We look for obvious messed up bcc or strings in response so we do not go off - the end since (at least) WIN2K and Windows XP have a major bug in not null - terminating last Unicode string in response */ - kfree(ses->serverOS); - ses->serverOS = - kzalloc(2 * (len + 1), GFP_KERNEL); - cifs_strfromUCS_le(ses->serverOS, - (__le16 *) - bcc_ptr, len, - nls_codepage); - bcc_ptr += 2 * (len + 1); - remaining_words -= len + 1; - ses->serverOS[2 * len] = 0; - ses->serverOS[1 + (2 * len)] = 0; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) - bcc_ptr, - remaining_words - - 1); - kfree(ses->serverNOS); - ses->serverNOS = - kzalloc(2 * (len + 1), - GFP_KERNEL); - cifs_strfromUCS_le(ses-> - serverNOS, - (__le16 *) - bcc_ptr, - len, - nls_codepage); - bcc_ptr += 2 * (len + 1); - ses->serverNOS[2 * len] = 0; - ses->serverNOS[1 + - (2 * len)] = 0; - remaining_words -= len + 1; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string not always null terminated - (for e.g. for Windows XP & 2000) */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2 * - (len + - 1), - GFP_KERNEL); - cifs_strfromUCS_le - (ses->serverDomain, - (__le16 *)bcc_ptr, - len, nls_codepage); - bcc_ptr += - 2 * (len + 1); - ses->serverDomain[2*len] - = 0; - ses->serverDomain - [1 + (2 * len)] - = 0; - } /* else no more room so create dummy domain string */ - else { - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2, - GFP_KERNEL); - } - } else { /* no room so create dummy domain and NOS string */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2, GFP_KERNEL); - kfree(ses->serverNOS); - ses->serverNOS = - kzalloc(2, GFP_KERNEL); - } - } else { /* ASCII */ - len = strnlen(bcc_ptr, 1024); - if (((long) bcc_ptr + len) - (long) - pByteArea(smb_buffer_response) - <= BCC(smb_buffer_response)) { - kfree(ses->serverOS); - ses->serverOS = - kzalloc(len + 1, - GFP_KERNEL); - strncpy(ses->serverOS, - bcc_ptr, len); - - bcc_ptr += len; - bcc_ptr[0] = 0; /* null terminate string */ - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverNOS); - ses->serverNOS = - kzalloc(len + 1, - GFP_KERNEL); - strncpy(ses->serverNOS, bcc_ptr, len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(len + 1, - GFP_KERNEL); - strncpy(ses->serverDomain, - bcc_ptr, len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - } else - cFYI(1, - ("field of length %d " - "extends beyond end of smb", - len)); - } - } else { - cERROR(1, ("Security Blob Length extends beyond" - " end of SMB")); - } - } else { - cERROR(1, ("No session structure passed in.")); - } - } else { - cERROR(1, ("Invalid Word count %d:", - smb_buffer_response->WordCount)); - rc = -EIO; - } - - cifs_buf_release(smb_buffer); - - return rc; -} - -static int -CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *ntlm_session_key, bool ntlmv2_flag, - const struct nls_table *nls_codepage) -{ - struct smb_hdr *smb_buffer; - struct smb_hdr *smb_buffer_response; - SESSION_SETUP_ANDX *pSMB; - SESSION_SETUP_ANDX *pSMBr; - char *bcc_ptr; - char *user; - char *domain; - int rc = 0; - int remaining_words = 0; - int bytes_returned = 0; - int len; - int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE); - PAUTHENTICATE_MESSAGE SecurityBlob; - __u32 negotiate_flags, capabilities; - __u16 count; - - cFYI(1, ("In NTLMSSPSessSetup (Authenticate)")); - if (ses == NULL) - return -EINVAL; - user = ses->userName; - domain = ses->domainName; - smb_buffer = cifs_buf_get(); - if (smb_buffer == NULL) { - return -ENOMEM; - } - smb_buffer_response = smb_buffer; - pSMB = (SESSION_SETUP_ANDX *)smb_buffer; - pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response; - - /* send SMBsessionSetup here */ - header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX, - NULL /* no tCon exists yet */ , 12 /* wct */ ); - - smb_buffer->Mid = GetNextMid(ses->server); - pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT); - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - pSMB->req.AndXCommand = 0xFF; - pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); - pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - - pSMB->req.hdr.Uid = ses->Suid; - - if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - - capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | - CAP_EXTENDED_SECURITY; - if (ses->capabilities & CAP_UNICODE) { - smb_buffer->Flags2 |= SMBFLG2_UNICODE; - capabilities |= CAP_UNICODE; - } - if (ses->capabilities & CAP_STATUS32) { - smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS; - capabilities |= CAP_STATUS32; - } - if (ses->capabilities & CAP_DFS) { - smb_buffer->Flags2 |= SMBFLG2_DFS; - capabilities |= CAP_DFS; - } - pSMB->req.Capabilities = cpu_to_le32(capabilities); - - bcc_ptr = (char *)&pSMB->req.SecurityBlob; - SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr; - strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8); - SecurityBlob->MessageType = NtLmAuthenticate; - bcc_ptr += SecurityBlobLength; - negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET | - NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO | - NTLMSSP_NEGOTIATE_56 | NTLMSSP_NEGOTIATE_128 | - NTLMSSP_NEGOTIATE_EXTENDED_SEC; - if (sign_CIFS_PDUs) - negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN; - -/* setup pointers to domain name and workstation name */ - - SecurityBlob->WorkstationName.BufferOffset = 0; - SecurityBlob->WorkstationName.Length = 0; - SecurityBlob->WorkstationName.MaximumLength = 0; - SecurityBlob->SessionKey.Length = 0; - SecurityBlob->SessionKey.MaximumLength = 0; - SecurityBlob->SessionKey.BufferOffset = 0; - - SecurityBlob->LmChallengeResponse.Length = 0; - SecurityBlob->LmChallengeResponse.MaximumLength = 0; - SecurityBlob->LmChallengeResponse.BufferOffset = 0; - - SecurityBlob->NtChallengeResponse.Length = - cpu_to_le16(CIFS_SESS_KEY_SIZE); - SecurityBlob->NtChallengeResponse.MaximumLength = - cpu_to_le16(CIFS_SESS_KEY_SIZE); - memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE); - SecurityBlob->NtChallengeResponse.BufferOffset = - cpu_to_le32(SecurityBlobLength); - SecurityBlobLength += CIFS_SESS_KEY_SIZE; - bcc_ptr += CIFS_SESS_KEY_SIZE; - - if (ses->capabilities & CAP_UNICODE) { - if (domain == NULL) { - SecurityBlob->DomainName.BufferOffset = 0; - SecurityBlob->DomainName.Length = 0; - SecurityBlob->DomainName.MaximumLength = 0; - } else { - __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64, - nls_codepage); - ln *= 2; - SecurityBlob->DomainName.MaximumLength = - cpu_to_le16(ln); - SecurityBlob->DomainName.BufferOffset = - cpu_to_le32(SecurityBlobLength); - bcc_ptr += ln; - SecurityBlobLength += ln; - SecurityBlob->DomainName.Length = cpu_to_le16(ln); - } - if (user == NULL) { - SecurityBlob->UserName.BufferOffset = 0; - SecurityBlob->UserName.Length = 0; - SecurityBlob->UserName.MaximumLength = 0; - } else { - __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64, - nls_codepage); - ln *= 2; - SecurityBlob->UserName.MaximumLength = - cpu_to_le16(ln); - SecurityBlob->UserName.BufferOffset = - cpu_to_le32(SecurityBlobLength); - bcc_ptr += ln; - SecurityBlobLength += ln; - SecurityBlob->UserName.Length = cpu_to_le16(ln); - } - - /* SecurityBlob->WorkstationName.Length = - cifs_strtoUCS((__le16 *) bcc_ptr, "AMACHINE",64, nls_codepage); - SecurityBlob->WorkstationName.Length *= 2; - SecurityBlob->WorkstationName.MaximumLength = - cpu_to_le16(SecurityBlob->WorkstationName.Length); - SecurityBlob->WorkstationName.BufferOffset = - cpu_to_le32(SecurityBlobLength); - bcc_ptr += SecurityBlob->WorkstationName.Length; - SecurityBlobLength += SecurityBlob->WorkstationName.Length; - SecurityBlob->WorkstationName.Length = - cpu_to_le16(SecurityBlob->WorkstationName.Length); */ - - if ((long) bcc_ptr % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ", - 32, nls_codepage); - bcc_ptr += 2 * bytes_returned; - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32, - nls_codepage); - bcc_ptr += 2 * bytes_returned; - bcc_ptr += 2; /* null term version string */ - bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, - 64, nls_codepage); - bcc_ptr += 2 * bytes_returned; - *(bcc_ptr + 1) = 0; - *(bcc_ptr + 2) = 0; - bcc_ptr += 2; /* null terminate network opsys string */ - *(bcc_ptr + 1) = 0; - *(bcc_ptr + 2) = 0; - bcc_ptr += 2; /* null domain */ - } else { /* ASCII */ - if (domain == NULL) { - SecurityBlob->DomainName.BufferOffset = 0; - SecurityBlob->DomainName.Length = 0; - SecurityBlob->DomainName.MaximumLength = 0; - } else { - __u16 ln; - negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED; - strncpy(bcc_ptr, domain, 63); - ln = strnlen(domain, 64); - SecurityBlob->DomainName.MaximumLength = - cpu_to_le16(ln); - SecurityBlob->DomainName.BufferOffset = - cpu_to_le32(SecurityBlobLength); - bcc_ptr += ln; - SecurityBlobLength += ln; - SecurityBlob->DomainName.Length = cpu_to_le16(ln); - } - if (user == NULL) { - SecurityBlob->UserName.BufferOffset = 0; - SecurityBlob->UserName.Length = 0; - SecurityBlob->UserName.MaximumLength = 0; - } else { - __u16 ln; - strncpy(bcc_ptr, user, 63); - ln = strnlen(user, 64); - SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln); - SecurityBlob->UserName.BufferOffset = - cpu_to_le32(SecurityBlobLength); - bcc_ptr += ln; - SecurityBlobLength += ln; - SecurityBlob->UserName.Length = cpu_to_le16(ln); - } - /* BB fill in our workstation name if known BB */ - - strcpy(bcc_ptr, "Linux version "); - bcc_ptr += strlen("Linux version "); - strcpy(bcc_ptr, utsname()->release); - bcc_ptr += strlen(utsname()->release) + 1; - strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); - bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; - bcc_ptr++; /* null domain */ - *bcc_ptr = 0; - } - SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags); - pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength); - count = (long) bcc_ptr - (long) pByteArea(smb_buffer); - smb_buffer->smb_buf_length += count; - pSMB->req.ByteCount = cpu_to_le16(count); - - rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, - &bytes_returned, CIFS_LONG_OP); - if (rc) { -/* rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */ - } else if ((smb_buffer_response->WordCount == 3) || - (smb_buffer_response->WordCount == 4)) { - __u16 action = le16_to_cpu(pSMBr->resp.Action); - __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); - if (action & GUEST_LOGIN) - cFYI(1, ("Guest login")); /* BB Should we set anything - in SesInfo struct ? */ -/* if (SecurityBlob2->MessageType != NtLm??) { - cFYI("Unexpected message type on auth response is %d")); - } */ - - if (ses) { - cFYI(1, - ("Check challenge UID %d vs auth response UID %d", - ses->Suid, smb_buffer_response->Uid)); - /* UID left in wire format */ - ses->Suid = smb_buffer_response->Uid; - bcc_ptr = pByteArea(smb_buffer_response); - /* response can have either 3 or 4 word count - Samba sends 3 */ - if ((pSMBr->resp.hdr.WordCount == 3) - || ((pSMBr->resp.hdr.WordCount == 4) - && (blob_len < - pSMBr->resp.ByteCount))) { - if (pSMBr->resp.hdr.WordCount == 4) { - bcc_ptr += - blob_len; - cFYI(1, - ("Security Blob Length %d ", - blob_len)); - } - - cFYI(1, - ("NTLMSSP response to Authenticate ")); - - if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { - if ((long) (bcc_ptr) % 2) { - remaining_words = - (BCC(smb_buffer_response) - - 1) / 2; - bcc_ptr++; /* Unicode strings must be word aligned */ - } else { - remaining_words = BCC(smb_buffer_response) / 2; - } - len = UniStrnlen((wchar_t *) bcc_ptr, - remaining_words - 1); -/* We look for obvious messed up bcc or strings in response so we do not go off - the end since (at least) WIN2K and Windows XP have a major bug in not null - terminating last Unicode string in response */ - kfree(ses->serverOS); - ses->serverOS = - kzalloc(2 * (len + 1), GFP_KERNEL); - cifs_strfromUCS_le(ses->serverOS, - (__le16 *) - bcc_ptr, len, - nls_codepage); - bcc_ptr += 2 * (len + 1); - remaining_words -= len + 1; - ses->serverOS[2 * len] = 0; - ses->serverOS[1 + (2 * len)] = 0; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) - bcc_ptr, - remaining_words - - 1); - kfree(ses->serverNOS); - ses->serverNOS = - kzalloc(2 * (len + 1), - GFP_KERNEL); - cifs_strfromUCS_le(ses-> - serverNOS, - (__le16 *) - bcc_ptr, - len, - nls_codepage); - bcc_ptr += 2 * (len + 1); - ses->serverNOS[2 * len] = 0; - ses->serverNOS[1+(2*len)] = 0; - remaining_words -= len + 1; - if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string not always null terminated (e.g. for Windows XP & 2000) */ - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(2 * - (len + - 1), - GFP_KERNEL); - cifs_strfromUCS_le - (ses-> - serverDomain, - (__le16 *) - bcc_ptr, len, - nls_codepage); - bcc_ptr += - 2 * (len + 1); - ses-> - serverDomain[2 - * len] - = 0; - ses-> - serverDomain[1 - + - (2 - * - len)] - = 0; - } /* else no more room so create dummy domain string */ - else { - kfree(ses->serverDomain); - ses->serverDomain = kzalloc(2,GFP_KERNEL); - } - } else { /* no room so create dummy domain and NOS string */ - kfree(ses->serverDomain); - ses->serverDomain = kzalloc(2, GFP_KERNEL); - kfree(ses->serverNOS); - ses->serverNOS = kzalloc(2, GFP_KERNEL); - } - } else { /* ASCII */ - len = strnlen(bcc_ptr, 1024); - if (((long) bcc_ptr + len) - - (long) pByteArea(smb_buffer_response) - <= BCC(smb_buffer_response)) { - kfree(ses->serverOS); - ses->serverOS = kzalloc(len + 1, GFP_KERNEL); - strncpy(ses->serverOS,bcc_ptr, len); - - bcc_ptr += len; - bcc_ptr[0] = 0; /* null terminate the string */ - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverNOS); - ses->serverNOS = kzalloc(len+1, - GFP_KERNEL); - strncpy(ses->serverNOS, - bcc_ptr, len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - - len = strnlen(bcc_ptr, 1024); - kfree(ses->serverDomain); - ses->serverDomain = - kzalloc(len+1, - GFP_KERNEL); - strncpy(ses->serverDomain, - bcc_ptr, len); - bcc_ptr += len; - bcc_ptr[0] = 0; - bcc_ptr++; - } else - cFYI(1, ("field of length %d " - "extends beyond end of smb ", - len)); - } - } else { - cERROR(1, ("Security Blob extends beyond end " - "of SMB")); - } - } else { - cERROR(1, ("No session structure passed in.")); - } - } else { - cERROR(1, ("Invalid Word count %d: ", - smb_buffer_response->WordCount)); - rc = -EIO; - } - - cifs_buf_release(smb_buffer); - - return rc; -} - int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, const char *tree, struct cifsTconInfo *tcon, -- cgit v1.2.3 From 341060273232a2df0d1a7fa53abc661fcf22747c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 2 May 2009 04:59:34 +0000 Subject: [CIFS] remove cifs_strfromUCS_le Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 29 ----------------------------- fs/cifs/cifs_unicode.h | 1 - 2 files changed, 30 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 6382720acf7..60e3c4253de 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -180,35 +180,6 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, return outlen; } -/* - * NAME: cifs_strfromUCS() - * - * FUNCTION: Convert little-endian unicode string to character string - * - */ -int -cifs_strfromUCS_le(char *to, const __le16 *from, - int len, const struct nls_table *codepage) -{ - int i; - int outlen = 0; - - for (i = 0; (i < len) && from[i]; i++) { - int charlen; - /* 2.4.0 kernel or greater */ - charlen = - codepage->uni2char(le16_to_cpu(from[i]), &to[outlen], - NLS_MAX_CHARSET_SIZE); - if (charlen > 0) { - outlen += charlen; - } else { - to[outlen++] = '?'; - } - } - to[outlen] = 0; - return outlen; -} - /* * NAME: cifs_strtoUCS() * diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 1570a701bf3..650638275a6 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -76,7 +76,6 @@ int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, bool mapchar); int cifs_ucs2_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage); -int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); char *cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, -- cgit v1.2.3 From afe48c31ea5c74eaac58621ce1c85ae8187c4383 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 2 May 2009 05:25:46 +0000 Subject: [CIFS] Fix final user of old string conversion code Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 80 ++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b968e5bd7df..0ac32bd336d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2564,7 +2564,6 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata, *pparmlen = parm_count; return 0; } -#endif /* CIFS_EXPERIMENTAL */ int CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, @@ -2611,59 +2610,55 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, } else { /* decode response */ __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); __u32 data_count = le32_to_cpu(pSMBr->DataCount); - if ((pSMBr->ByteCount < 2) || (data_offset > 512)) + if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { /* BB also check enough total bytes returned */ rc = -EIO; /* bad smb */ - else { - if (data_count && (data_count < 2048)) { - char *end_of_smb = 2 /* sizeof byte count */ + - pSMBr->ByteCount + - (char *)&pSMBr->ByteCount; + goto qreparse_out; + } + if (data_count && (data_count < 2048)) { + char *end_of_smb = 2 /* sizeof byte count */ + + pSMBr->ByteCount + (char *)&pSMBr->ByteCount; - struct reparse_data *reparse_buf = + struct reparse_data *reparse_buf = (struct reparse_data *) ((char *)&pSMBr->hdr.Protocol + data_offset); - if ((char *)reparse_buf >= end_of_smb) { - rc = -EIO; - goto qreparse_out; - } - if ((reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset + - reparse_buf->TargetNameLen) > - end_of_smb) { - cFYI(1, ("reparse buf beyond SMB")); - rc = -EIO; - goto qreparse_out; - } + if ((char *)reparse_buf >= end_of_smb) { + rc = -EIO; + goto qreparse_out; + } + if ((reparse_buf->LinkNamesBuf + + reparse_buf->TargetNameOffset + + reparse_buf->TargetNameLen) > end_of_smb) { + cFYI(1, ("reparse buf beyond SMB")); + rc = -EIO; + goto qreparse_out; + } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = UniStrnlen((wchar_t *) + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { + cifs_from_ucs2(symlinkinfo, (__le16 *) (reparse_buf->LinkNamesBuf + reparse_buf->TargetNameOffset), - min(buflen/2, - reparse_buf->TargetNameLen / 2)); - cifs_strfromUCS_le(symlinkinfo, - (__le16 *) (reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset), - name_len, nls_codepage); - } else { /* ASCII names */ - strncpy(symlinkinfo, - reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset, - min_t(const int, buflen, - reparse_buf->TargetNameLen)); - } - } else { - rc = -EIO; - cFYI(1, ("Invalid return data count on " - "get reparse info ioctl")); + buflen, + reparse_buf->TargetNameLen, + nls_codepage, 0); + } else { /* ASCII names */ + strncpy(symlinkinfo, + reparse_buf->LinkNamesBuf + + reparse_buf->TargetNameOffset, + min_t(const int, buflen, + reparse_buf->TargetNameLen)); } - symlinkinfo[buflen] = 0; /* just in case so the caller - does not go off the end of the buffer */ - cFYI(1, ("readlink result - %s", symlinkinfo)); + } else { + rc = -EIO; + cFYI(1, ("Invalid return data count on " + "get reparse info ioctl")); } + symlinkinfo[buflen] = 0; /* just in case so the caller + does not go off the end of the buffer */ + cFYI(1, ("readlink result - %s", symlinkinfo)); } + qreparse_out: cifs_buf_release(pSMB); @@ -2672,6 +2667,7 @@ qreparse_out: return rc; } +#endif /* CIFS_EXPERIMENTAL */ #ifdef CONFIG_CIFS_POSIX -- cgit v1.2.3 From 989c7e512f4dc976b10803ab0c449bdaaf3eaabd Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 2 May 2009 05:32:20 +0000 Subject: [CIFS] remove checkpatch warning Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 0ac32bd336d..259f633ca59 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2658,7 +2658,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, does not go off the end of the buffer */ cFYI(1, ("readlink result - %s", symlinkinfo)); } - + qreparse_out: cifs_buf_release(pSMB); -- cgit v1.2.3 From 24d35add2bd09a427cacb7a39e14f3e47ed4d766 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 2 May 2009 05:40:39 +0000 Subject: [CIFS] Remove sparse warning Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 259f633ca59..4e10efd2432 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2573,7 +2573,6 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, { int rc = 0; int bytes_returned; - int name_len; struct smb_com_transaction_ioctl_req *pSMB; struct smb_com_transaction_ioctl_rsp *pSMBr; -- cgit v1.2.3 From a8985f3ac503b51c5abf8883fc4fb912e13b955c Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 30 Apr 2009 15:08:09 -0700 Subject: autofs4: fix incorrect return in autofs4_mount_busy() Fix an obvious incorrect return status in autofs4_mount_busy(). Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 75f7ddacf7d..3077d8f1652 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -70,8 +70,10 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) * Otherwise it's an offset mount and we need to check * if we can umount its mount, if there is one. */ - if (!d_mountpoint(dentry)) + if (!d_mountpoint(dentry)) { + status = 0; goto done; + } } /* Update the expiry counter if fs is busy */ -- cgit v1.2.3 From b827e496c893de0c0f142abfaeb8730a2fd6b37f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 30 Apr 2009 15:08:16 -0700 Subject: mm: close page_mkwrite races Change page_mkwrite to allow implementations to return with the page locked, and also change it's callers (in page fault paths) to hold the lock until the page is marked dirty. This allows the filesystem to have full control of page dirtying events coming from the VM. Rather than simply hold the page locked over the page_mkwrite call, we call page_mkwrite with the page unlocked and allow callers to return with it locked, so filesystems can avoid LOR conditions with page lock. The problem with the current scheme is this: a filesystem that wants to associate some metadata with a page as long as the page is dirty, will perform this manipulation in its ->page_mkwrite. It currently then must return with the page unlocked and may not hold any other locks (according to existing page_mkwrite convention). In this window, the VM could write out the page, clearing page-dirty. The filesystem has no good way to detect that a dirty pte is about to be attached, so it will happily write out the page, at which point, the filesystem may manipulate the metadata to reflect that the page is no longer dirty. It is not always possible to perform the required metadata manipulation in ->set_page_dirty, because that function cannot block or fail. The filesystem may need to allocate some data structure, for example. And the VM cannot mark the pte dirty before page_mkwrite, because page_mkwrite is allowed to fail, so we must not allow any window where the page could be written to if page_mkwrite does fail. This solution of holding the page locked over the 3 critical operations (page_mkwrite, setting the pte dirty, and finally setting the page dirty) closes out races nicely, preventing page cleaning for writeout being initiated in that window. This provides the filesystem with a strong synchronisation against the VM here. - Sage needs this race closed for ceph filesystem. - Trond for NFS (http://bugzilla.kernel.org/show_bug.cgi?id=12913). - I need it for fsblock. - I suspect other filesystems may need it too (eg. btrfs). - I have converted buffer.c to the new locking. Even simple block allocation under dirty pages might be susceptible to i_size changing under partial page at the end of file (we also have a buffer.c-side problem here, but it cannot be fixed properly without this patch). - Other filesystems (eg. NFS, maybe btrfs) will need to change their page_mkwrite functions themselves. [ This also moves page_mkwrite another step closer to fault, which should eventually allow page_mkwrite to be moved into ->fault, and thus avoiding a filesystem calldown and page lock/unlock cycle in __do_fault. ] [akpm@linux-foundation.org: fix derefs of NULL ->mapping] Cc: Sage Weil Cc: Trond Myklebust Signed-off-by: Nick Piggin Cc: Valdis Kletnieks Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index b3e5be7514f..aed297739eb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2397,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if ((page->mapping != inode->i_mapping) || (page_offset(page) > size)) { /* page got truncated out from underneath us */ - goto out_unlock; + unlock_page(page); + goto out; } /* page is wholly or partially inside EOF */ @@ -2411,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, ret = block_commit_write(page, 0, end); if (unlikely(ret)) { + unlock_page(page); if (ret == -ENOMEM) ret = VM_FAULT_OOM; else /* -ENOSPC, -EIO, etc */ ret = VM_FAULT_SIGBUS; - } + } else + ret = VM_FAULT_LOCKED; -out_unlock: - unlock_page(page); +out: return ret; } -- cgit v1.2.3 From 0816178638c15ce5472d39d771a96860dff4141a Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Thu, 30 Apr 2009 15:08:18 -0700 Subject: pagemap: require aligned-length, non-null reads of /proc/pid/pagemap The intention of commit aae8679b0ebcaa92f99c1c3cb0cd651594a43915 ("pagemap: fix bug in add_to_pagemap, require aligned-length reads of /proc/pid/pagemap") was to force reads of /proc/pid/pagemap to be a multiple of 8 bytes, but now it allows to read 0 bytes, which actually puts some data to user's buffer. According to POSIX, if count is zero, read() should return zero and has no other results. Signed-off-by: Vitaly Mayatskikh Cc: Thomas Tuttle Acked-by: Matt Mackall Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 39e4ad4f59f..6f61b7cc32e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -665,6 +665,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_task; ret = 0; + + if (!count) + goto out_task; + mm = get_task_mm(task); if (!mm) goto out_task; -- cgit v1.2.3 From 74641f584da8eccf30becfbb5507ab457187db22 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 30 Apr 2009 15:08:49 -0700 Subject: alpha: binfmt_aout fix This fixes the problem introduced by commit 3bfacef412 (get rid of special-casing the /sbin/loader on alpha): osf/1 ecoff binary segfaults when binfmt_aout built as module. That happens because aout binary handler gets on the top of the binfmt list due to late registration, and kernel attempts to execute the binary without preparatory work that must be done by binfmt_loader. Fixed by changing the registration order of the default binfmt handlers using list_add_tail() and introducing insert_binfmt() function which places new handler on the top of the binfmt list. This might be generally useful for installing arch-specific frontends for default handlers or just for overriding them. Signed-off-by: Ivan Kokshaysky Cc: Al Viro Cc: Richard Henderson Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index a3a8ce83940..639177b0eea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -69,17 +69,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { -- cgit v1.2.3 From 00a62ce91e554198ef28234c91c36f850f5a3bc9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 30 Apr 2009 15:08:51 -0700 Subject: mm: fix Committed_AS underflow on large NR_CPUS environment The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen Signed-off-by: KOSAKI Motohiro Cc: Eric B Munson Cc: Mel Gorman Cc: Christoph Lameter Cc: [All kernel versions] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 74ea974f5ca..c6b0302af4c 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); + committed = percpu_counter_read_positive(&vm_committed_as); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; -- cgit v1.2.3 From 0ae05fb254a5f2617fc8fcfff7be959b54a5e963 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 30 Apr 2009 15:08:54 -0700 Subject: ptrace: s/parent/real_parent/ in binfmt_elf_fdpic.c ->real_parent is the parent. ->parent may be the tracer. Signed-off-by: Oleg Nesterov Acked-by: David Howells Acked-by: Roland McGrath Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 70cfc4b84ae..fdb66faa24f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1388,7 +1388,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; prstatus->pr_pid = task_pid_vnr(p); - prstatus->pr_ppid = task_pid_vnr(p->parent); + prstatus->pr_ppid = task_pid_vnr(p->real_parent); prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { @@ -1433,7 +1433,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_psargs[len] = 0; psinfo->pr_pid = task_pid_vnr(p); - psinfo->pr_ppid = task_pid_vnr(p->parent); + psinfo->pr_ppid = task_pid_vnr(p->real_parent); psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); -- cgit v1.2.3 From 7fdf523067666b0eaff330f362401ee50ce187c4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Apr 2009 17:32:22 -0400 Subject: NFS: Close page_mkwrite() races Follow up to Nick Piggin's patches to ensure that nfs_vm_page_mkwrite returns with the page lock held, and sets the VM_FAULT_LOCKED flag. See http://bugzilla.kernel.org/show_bug.cgi?id=12913 Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5a97bcfe03e..ec7e27d00bc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -517,10 +517,10 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = nfs_updatepage(filp, page, 0, pagelen); out_unlock: + if (!ret) + return VM_FAULT_LOCKED; unlock_page(page); - if (ret) - ret = VM_FAULT_SIGBUS; - return ret; + return VM_FAULT_SIGBUS; } static struct vm_operations_struct nfs_file_vm_ops = { -- cgit v1.2.3 From ccecee1e5e42981f5eb37f4411e8552b9db04d30 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 1 May 2009 11:33:53 -0400 Subject: nfsd41: slots are freed with session The session and slots are allocated all in one piece. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c65a27b76a9..3b711f5147a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -580,7 +580,6 @@ free_session(struct kref *kref) struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; nfsd4_release_respages(e->ce_respages, e->ce_resused); } - kfree(ses->se_slots); kfree(ses); } -- cgit v1.2.3 From 0b3cc858003b79b6c66ad79415ead907cbe4074e Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 4 May 2009 08:37:12 +0000 Subject: [CIFS] NTLMSSP reenabled after move from connect.c to sess.c The NTLMSSP code was removed from fs/cifs/connect.c and merged (75% smaller, cleaner) into fs/cifs/sess.c As with the old code it requires that cifs be built with CONFIG_CIFS_EXPERIMENTAL, the /proc/fs/cifs/Experimental flag must be set to 2, and mount must turn on extended security (e.g. with sec=krb5). Although NTLMSSP encapsulated in SPNEGO is not enabled yet, "raw" ntlmssp is common and useful in some cases since it offers more complete security negotiation, and is the default way of negotiating security for many Windows systems. SPNEGO encapsulated NTLMSSP will be able to reuse the same code. Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 +- fs/cifs/sess.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 251 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 43f2e0d061f..f20c4069c22 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -8,7 +8,8 @@ accurately mark string length before allocating memory (which may help the rare cases where a UTF-8 string is much larger than the UCS2 string that we converted from). Fix endianness of the vcnum field used during session setup to distinguish multiple mounts to same server from different -userids. +userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental +flag to be set to 2, and mount must enable krb5 to turn on extended security). Version 1.57 ------------ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b2bdc2a3383..e68744e169b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -378,6 +378,186 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, return rc; } +static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, + struct cifsSesInfo *ses) +{ + CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; + + if (blob_len < sizeof(CHALLENGE_MESSAGE)) { + cERROR(1, ("challenge blob len %d too small", blob_len)); + return -EINVAL; + } + + if (memcmp(pblob->Signature, "NTLMSSP", 8)) { + cERROR(1, ("blob signature incorrect %s", pblob->Signature)); + return -EINVAL; + } + if (pblob->MessageType != NtLmChallenge) { + cERROR(1, ("Incorrect message type %d", pblob->MessageType)); + return -EINVAL; + } + + memcpy(ses->server->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); + /* BB we could decode pblob->NegotiateFlags; some may be useful */ + /* In particular we can examine sign flags */ + /* BB spec says that if AvId field of MsvAvTimestamp is populated then + we must set the MIC field of the AUTHENTICATE_MESSAGE */ + + return 0; +} + +#ifdef CONFIG_CIFS_EXPERIMENTAL +/* BB Move to ntlmssp.c eventually */ + +/* We do not malloc the blob, it is passed in pbuffer, because + it is fixed size, and small, making this approach cleaner */ +static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, + struct cifsSesInfo *ses) +{ + NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; + __u32 flags; + + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); + sec_blob->MessageType = NtLmNegotiate; + + /* BB is NTLMV2 session security format easier to use here? */ + flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | + NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | + NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; + if (ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + flags |= NTLMSSP_NEGOTIATE_SIGN; + if (ses->server->secMode & SECMODE_SIGN_REQUIRED) + flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; + + sec_blob->NegotiateFlags |= cpu_to_le32(flags); + + sec_blob->WorkstationName.BufferOffset = 0; + sec_blob->WorkstationName.Length = 0; + sec_blob->WorkstationName.MaximumLength = 0; + + /* Domain name is sent on the Challenge not Negotiate NTLMSSP request */ + sec_blob->DomainName.BufferOffset = 0; + sec_blob->DomainName.Length = 0; + sec_blob->DomainName.MaximumLength = 0; +} + +/* We do not malloc the blob, it is passed in pbuffer, because its + maximum possible size is fixed and small, making this approach cleaner. + This function returns the length of the data in the blob */ +static int build_ntlmssp_auth_blob(unsigned char *pbuffer, + struct cifsSesInfo *ses, + const struct nls_table *nls_cp, int first) +{ + AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; + __u32 flags; + unsigned char *tmp; + char ntlm_session_key[CIFS_SESS_KEY_SIZE]; + + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); + sec_blob->MessageType = NtLmAuthenticate; + + flags = NTLMSSP_NEGOTIATE_56 | + NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | + NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | + NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; + if (ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + flags |= NTLMSSP_NEGOTIATE_SIGN; + if (ses->server->secMode & SECMODE_SIGN_REQUIRED) + flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; + + tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); + sec_blob->NegotiateFlags |= cpu_to_le32(flags); + + sec_blob->LmChallengeResponse.BufferOffset = + cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE)); + sec_blob->LmChallengeResponse.Length = 0; + sec_blob->LmChallengeResponse.MaximumLength = 0; + + /* calculate session key, BB what about adding similar ntlmv2 path? */ + SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); + if (first) + cifs_calculate_mac_key(&ses->server->mac_signing_key, + ntlm_session_key, ses->password); + + memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); + sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); + sec_blob->NtChallengeResponse.MaximumLength = + cpu_to_le16(CIFS_SESS_KEY_SIZE); + + tmp += CIFS_SESS_KEY_SIZE; + + if (ses->domainName == NULL) { + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.Length = 0; + sec_blob->DomainName.MaximumLength = 0; + tmp += 2; + } else { + int len; + len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, + MAX_USERNAME_SIZE, nls_cp); + len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.Length = cpu_to_le16(len); + sec_blob->DomainName.MaximumLength = cpu_to_le16(len); + tmp += len; + } + + if (ses->userName == NULL) { + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.Length = 0; + sec_blob->UserName.MaximumLength = 0; + tmp += 2; + } else { + int len; + len = cifs_strtoUCS((__le16 *)tmp, ses->userName, + MAX_USERNAME_SIZE, nls_cp); + len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.Length = cpu_to_le16(len); + sec_blob->UserName.MaximumLength = cpu_to_le16(len); + tmp += len; + } + + sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->WorkstationName.Length = 0; + sec_blob->WorkstationName.MaximumLength = 0; + tmp += 2; + + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.Length = 0; + sec_blob->SessionKey.MaximumLength = 0; + return tmp - pbuffer; +} + + +static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, + struct cifsSesInfo *ses) +{ + build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses); + pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); + + return; +} + +static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, + struct cifsSesInfo *ses, + const struct nls_table *nls, int first_time) +{ + int bloblen; + + bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, + first_time); + pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); + + return bloblen; +} +#endif + int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, const struct nls_table *nls_cp) @@ -396,6 +576,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, __u16 action; int bytes_remaining; struct key *spnego_key = NULL; + __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ if (ses == NULL) return -EINVAL; @@ -403,6 +584,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, type = ses->server->secType; cFYI(1, ("sess setup type %d", type)); +ssetup_ntlmssp_authenticate: + if (phase == NtLmChallenge) + phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ + if (type == LANMAN) { #ifndef CONFIG_CIFS_WEAK_PW_HASH /* LANMAN and plaintext are less secure and off by default. @@ -616,9 +801,49 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, goto ssetup_exit; #endif /* CONFIG_CIFS_UPCALL */ } else { +#ifdef CONFIG_CIFS_EXPERIMENTAL + if ((experimEnabled > 1) && (type == RawNTLMSSP)) { + if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { + cERROR(1, ("NTLMSSP requires Unicode support")); + rc = -ENOSYS; + goto ssetup_exit; + } + + cFYI(1, ("ntlmssp session setup phase %d", phase)); + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities |= cpu_to_le32(capabilities); + if (phase == NtLmNegotiate) { + setup_ntlmssp_neg_req(pSMB, ses); + iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); + } else if (phase == NtLmAuthenticate) { + int blob_len; + blob_len = setup_ntlmssp_auth_req(pSMB, ses, + nls_cp, + first_time); + iov[1].iov_len = blob_len; + } else { + cERROR(1, ("invalid phase %d", phase)); + rc = -ENOSYS; + goto ssetup_exit; + } + iov[1].iov_base = &pSMB->req.SecurityBlob[0]; + /* unicode strings must be word aligned */ + if ((iov[0].iov_len + iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, nls_cp); + } else { + cERROR(1, ("secType %d not supported!", type)); + rc = -ENOSYS; + goto ssetup_exit; + } +#else cERROR(1, ("secType %d not supported!", type)); rc = -ENOSYS; goto ssetup_exit; +#endif } iov[2].iov_base = str_area; @@ -634,12 +859,23 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* SMB request buf freed in SendReceive2 */ cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); - if (rc) - goto ssetup_exit; pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; smb_buf = (struct smb_hdr *)iov[0].iov_base; + if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == + cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { + if (phase != NtLmNegotiate) { + cERROR(1, ("Unexpected more processing error")); + goto ssetup_exit; + } + /* NTLMSSP Negotiate sent now processing challenge (response) */ + phase = NtLmChallenge; /* process ntlmssp challenge */ + rc = 0; /* MORE_PROC rc is not an error here, but expected */ + } + if (rc) + goto ssetup_exit; + if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { rc = -EIO; cERROR(1, ("bad word count %d", smb_buf->WordCount)); @@ -658,12 +894,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, if (smb_buf->WordCount == 4) { __u16 blob_len; blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); - bcc_ptr += blob_len; if (blob_len > bytes_remaining) { cERROR(1, ("bad security blob length %d", blob_len)); rc = -EINVAL; goto ssetup_exit; } + if (phase == NtLmChallenge) { + rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); + /* now goto beginning for ntlmssp authenticate phase */ + if (rc) + goto ssetup_exit; + } + bcc_ptr += blob_len; bytes_remaining -= blob_len; } @@ -692,5 +934,9 @@ ssetup_exit: } else if (resp_buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); + /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ + if ((phase == NtLmChallenge) && (rc == 0)) + goto ssetup_ntlmssp_authenticate; + return rc; } -- cgit v1.2.3 From f83ce3e6b02d5e48b3a43b001390e2b58820389d Mon Sep 17 00:00:00 2001 From: Jake Edge Date: Mon, 4 May 2009 12:51:14 -0600 Subject: proc: avoid information leaks to non-privileged processes By using the same test as is used for /proc/pid/maps and /proc/pid/smaps, only allow processes that can ptrace() a given process to see information that might be used to bypass address space layout randomization (ASLR). These include eip, esp, wchan, and start_stack in /proc/pid/stat as well as the non-symbolic output from /proc/pid/wchan. ASLR can be bypassed by sampling eip as shown by the proof-of-concept code at http://code.google.com/p/fuzzyaslr/ As part of a presentation (http://www.cr0.org/paper/to-jt-linux-alsr-leak.pdf) esp and wchan were also noted as possibly usable information leaks as well. The start_stack address also leaks potentially useful information. Cc: Stable Team Signed-off-by: Jake Edge Acked-by: Arjan van de Ven Acked-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- fs/proc/array.c | 13 +++++++++---- fs/proc/base.c | 5 ++++- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 7e4877d9dcb..725a650bbbb 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -352,6 +353,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, char state; pid_t ppid = 0, pgid = -1, sid = -1; int num_threads = 0; + int permitted; struct mm_struct *mm; unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; @@ -364,11 +366,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, state = *get_task_state(task); vsize = eip = esp = 0; + permitted = ptrace_may_access(task, PTRACE_MODE_READ); mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (permitted) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } } get_task_comm(tcomm, task); @@ -424,7 +429,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unlock_task_sighand(task, &flags); } - if (!whole || num_threads < 2) + if (permitted && (!whole || num_threads < 2)) wchan = get_wchan(task); if (!whole) { min_flt = task->min_flt; @@ -476,7 +481,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - mm ? mm->start_stack : 0, + (permitted && mm) ? mm->start_stack : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/base.c b/fs/proc/base.c index aa763ab0077..fb45615943c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -322,7 +322,10 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) wchan = get_wchan(task); if (lookup_symbol_name(wchan, symname) < 0) - return sprintf(buffer, "%lu", wchan); + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + return 0; + else + return sprintf(buffer, "%lu", wchan); else return sprintf(buffer, "%s", symname); } -- cgit v1.2.3 From 844823cb822932d2c599abf38692e3d6a5b5a320 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 6 May 2009 00:48:30 +0000 Subject: [CIFS] Fix SMB uid in NTLMSSP authenticate request We were not setting the SMB uid in NTLMSSP authenticate request which could lead to INVALID_PARAMETER error on 2nd session setup. Signed-off-by: Steve French --- fs/cifs/sess.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index e68744e169b..897a052270f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -822,6 +822,10 @@ ssetup_ntlmssp_authenticate: nls_cp, first_time); iov[1].iov_len = blob_len; + /* Make sure that we tell the server that we + are using the uid that it just gave us back + on the response (challenge) */ + smb_buf->Uid = ses->Suid; } else { cERROR(1, ("invalid phase %d", phase)); rc = -ENOSYS; -- cgit v1.2.3 From ac68392460ffefed13020967bae04edc4d3add06 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 6 May 2009 04:16:04 +0000 Subject: [CIFS] Allow raw ntlmssp code to be enabled with sec=ntlmssp On mount, "sec=ntlmssp" can now be specified to allow "rawntlmssp" security to be enabled during CIFS session establishment/authentication (ntlmssp used to require specifying krb5 which was counterintuitive). Signed-off-by: Steve French --- fs/cifs/README | 10 +++++++++- fs/cifs/cifsglob.h | 16 +++++++++------- fs/cifs/cifssmb.c | 10 ++++++++++ fs/cifs/connect.c | 7 +++++++ 4 files changed, 35 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/README b/fs/cifs/README index 07434181623..db208ddb989 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -651,7 +651,15 @@ Experimental When set to 1 used to enable certain experimental signing turned on in case buffer was modified just before it was sent, also this flag will be used to use the new experimental directory change - notification code). + notification code). When set to 2 enables + an additional experimental feature, "raw ntlmssp" + session establishment support (which allows + specifying "sec=ntlmssp" on mount). The Linux cifs + module will use ntlmv2 authentication encapsulated + in "raw ntlmssp" (not using SPNEGO) when + "sec=ntlmssp" is specified on mount. + This support also requires building cifs with + the CONFIG_CIFS_EXPERIMENTAL configuration flag. These experimental features and tracing can be enabled by changing flags in /proc/fs/cifs (after the cifs module has been installed or built into the diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index df40ab64cd9..a61ab772c6f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -82,8 +82,8 @@ enum securityEnum { LANMAN, /* Legacy LANMAN auth */ NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ - RawNTLMSSP, /* NTLMSSP without SPNEGO */ - NTLMSSP, /* NTLMSSP via SPNEGO */ + RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ + NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ Kerberos, /* Kerberos via SPNEGO */ MSKerberos, /* MS Kerberos via SPNEGO */ }; @@ -531,6 +531,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define CIFSSEC_MAY_PLNTXT 0 #endif /* weak passwords */ #define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 /* note that only one of the following can be set so the @@ -543,22 +544,23 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_LANMAN 0x10010 #define CIFSSEC_MUST_PLNTXT 0x20020 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x3F03F /* allows weak security but also krb5 */ +#define CIFSSEC_MASK 0xAF0AF /* allows weak security but also krb5 */ #else -#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */ +#define CIFSSEC_MASK 0xA70A7 /* current flags supported if weak */ #endif /* UPCALL */ #else /* do not allow weak pw hash */ #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x0F00F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x07007 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ #endif /* UPCALL */ #endif /* WEAK_PW_HASH */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) -#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5) +#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* ***************************************************************** * All constants go here diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4e10efd2432..75e6623a863 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -449,6 +449,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) cFYI(1, ("Kerberos only mechanism, enable extended security")); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; } +#ifdef CONFIG_CIFS_EXPERIMENTAL + else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) + pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; + else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { + cFYI(1, ("NTLMSSP only mechanism, enable extended security")); + pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; + } +#endif count = 0; for (i = 0; i < CIFS_NUM_PROT; i++) { @@ -585,6 +593,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) server->secType = NTLMv2; else if (secFlags & CIFSSEC_MAY_KRB5) server->secType = Kerberos; + else if (secFlags & CIFSSEC_MAY_NTLMSSP) + server->secType = NTLMSSP; else if (secFlags & CIFSSEC_MAY_LANMAN) server->secType = LANMAN; /* #ifdef CONFIG_CIFS_EXPERIMENTAL diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3a934dd8422..4aa81a507b7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -979,6 +979,13 @@ cifs_parse_mount_options(char *options, const char *devname, return 1; } else if (strnicmp(value, "krb5", 4) == 0) { vol->secFlg |= CIFSSEC_MAY_KRB5; +#ifdef CONFIG_CIFS_EXPERIMENTAL + } else if (strnicmp(value, "ntlmsspi", 8) == 0) { + vol->secFlg |= CIFSSEC_MAY_NTLMSSP | + CIFSSEC_MUST_SIGN; + } else if (strnicmp(value, "ntlmssp", 7) == 0) { + vol->secFlg |= CIFSSEC_MAY_NTLMSSP; +#endif } else if (strnicmp(value, "ntlmv2i", 7) == 0) { vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN; -- cgit v1.2.3 From b2c0cea6b1cb210e962f07047df602875564069e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 5 May 2009 19:04:29 -0400 Subject: nfsd4: check for negative dentry before use in nfsv4 readdir After 2f9092e1020246168b1309b35e085ecd7ff9ff72 "Fix i_mutex vs. readdir handling in nfsd" (and 14f7dd63 "Copy XFS readdir hack into nfsd code"), an entry may be removed between the first mutex_unlock and the second mutex_lock. In this case, lookup_one_len() will return a negative dentry. Check for this case to avoid a NULL dereference. Signed-off-by: J. Bruce Fields Reviewed-by: J. R. Okajima Cc: stable@kernel.org --- fs/nfsd/nfs4xdr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b820c311931..b73549d293b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2214,6 +2214,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } exp_get(exp); /* @@ -2276,6 +2285,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; __be32 *p = cd->buffer; + __be32 *cookiep; __be32 nfserr = nfserr_toosmall; /* In nfsv4, "." and ".." never make it onto the wire.. */ @@ -2292,7 +2302,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; *p++ = xdr_one; /* mark entry present */ - cd->offset = p; /* remember pointer */ + cookiep = p; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ @@ -2306,6 +2316,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; case nfserr_dropit: goto fail; + case nfserr_noent: + goto skip_entry; default: /* * If the client requested the RDATTR_ERROR attribute, @@ -2324,6 +2336,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, } cd->buflen -= (p - cd->buffer); cd->buffer = p; + cd->offset = cookiep; +skip_entry: cd->common.err = nfs_ok; return 0; fail: -- cgit v1.2.3 From 89996df4b5b1a09c279f50b3fd03aa9df735f5cb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 6 May 2009 16:32:54 -0400 Subject: lockd: fix list corruption on lockd restart If lockd is signalled soon enough after restart then locks_start_grace() will try to re-add an entry to a list and trigger a lock corruption warning. Thanks to Wang Chen for the problem report and diagnosis. WARNING: at lib/list_debug.c:26 __list_add+0x27/0x5c() ... list_add corruption. next->prev should be prev (ef8fe958), but was ef8ff128. (next=ef8ff128). ... Pid: 23062, comm: lockd Tainted: G W 2.6.30-rc2 #3 Call Trace: [] warn_slowpath+0x71/0xa0 [] ? update_curr+0x11d/0x125 [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? trace_hardirqs_on+0xb/0xd [] ? _raw_spin_lock+0x53/0xfa [] __list_add+0x27/0x5c [] locks_start_grace+0x22/0x30 [lockd] [] set_grace_period+0x39/0x53 [lockd] [] ? lock_kernel+0x1c/0x28 [] lockd+0x64/0x164 [lockd] [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? complete+0x34/0x3e [] ? lockd+0x0/0x164 [lockd] [] ? lockd+0x0/0x164 [lockd] [] kthread+0x45/0x6b [] ? kthread+0x0/0x6b [] kernel_thread_helper+0x7/0x10 Reported-by: Wang Chen Signed-off-by: J. Bruce Fields Cc: stable@kernel.org --- fs/lockd/svc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index abf83881f68..1a54ae14a19 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -104,6 +104,16 @@ static void set_grace_period(void) schedule_delayed_work(&grace_period_end, grace_period); } +static void restart_grace(void) +{ + if (nlmsvc_ops) { + cancel_delayed_work_sync(&grace_period_end); + locks_end_grace(&lockd_manager); + nlmsvc_invalidate_all(); + set_grace_period(); + } +} + /* * This is the lockd kernel thread */ @@ -149,10 +159,7 @@ lockd(void *vrqstp) if (signalled()) { flush_signals(current); - if (nlmsvc_ops) { - nlmsvc_invalidate_all(); - set_grace_period(); - } + restart_grace(); continue; } -- cgit v1.2.3 From 381a80e6df396eaabef2c00f85974a4579ac1c70 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 6 May 2009 16:02:50 -0700 Subject: inotify: use GFP_NOFS in kernel_event() to work around a lockdep false-positive There is what we believe to be a false positive reported by lockdep. inotify_inode_queue_event() => take inotify_mutex => kernel_event() => kmalloc() => SLOB => alloc_pages_node() => page reclaim => slab reclaim => dcache reclaim => inotify_inode_is_dead => take inotify_mutex => deadlock The plan is to fix this via lockdep annotation, but that is proving to be quite involved. The patch flips the allocation over to GFP_NFS to shut the warning up, for the 2.6.30 release. Hopefully we will fix this for real in 2.6.31. I'll queue a patch in -mm to switch it back to GFP_KERNEL so we don't forget. ================================= [ INFO: inconsistent lock state ] 2.6.30-rc2-next-20090417 #203 --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/380 [HC0[0]:SC0[0]:HE1:SE1] takes: (&inode->inotify_mutex){+.+.?.}, at: [] inotify_inode_is_dead+0x35/0xb0 {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0x68/0x90 [] lockdep_trace_alloc+0xf5/0x100 [] __kmalloc_node+0x31/0x1e0 [] kernel_event+0xe2/0x190 [] inotify_dev_queue_event+0x126/0x230 [] inotify_inode_queue_event+0xc6/0x110 [] vfs_create+0xcd/0x140 [] do_filp_open+0x88d/0xa20 [] do_sys_open+0x98/0x140 [] sys_open+0x20/0x30 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff irq event stamp: 690455 hardirqs last enabled at (690455): [] _spin_unlock_irqrestore+0x44/0x80 hardirqs last disabled at (690454): [] _spin_lock_irqsave+0x32/0xa0 softirqs last enabled at (690178): [] __do_softirq+0x202/0x220 softirqs last disabled at (690157): [] call_softirq+0x1c/0x50 other info that might help us debug this: 2 locks held by kswapd0/380: #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x37/0x180 #1: (&type->s_umount_key#17){++++..}, at: [] shrink_dcache_memory+0x11f/0x1e0 stack backtrace: Pid: 380, comm: kswapd0 Not tainted 2.6.30-rc2-next-20090417 #203 Call Trace: [] print_usage_bug+0x19f/0x200 [] ? save_stack_trace+0x2f/0x50 [] mark_lock+0x4bb/0x6d0 [] ? check_usage_forwards+0x0/0xc0 [] __lock_acquire+0xc62/0x1ae0 [] ? slob_free+0x10c/0x370 [] lock_acquire+0xe1/0x120 [] ? inotify_inode_is_dead+0x35/0xb0 [] mutex_lock_nested+0x63/0x420 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? inotify_inode_is_dead+0x35/0xb0 [] ? sched_clock+0x9/0x10 [] ? lock_release_holdtime+0x35/0x1c0 [] inotify_inode_is_dead+0x35/0xb0 [] dentry_iput+0xbc/0xe0 [] d_kill+0x33/0x60 [] __shrink_dcache_sb+0x2d3/0x350 [] shrink_dcache_memory+0x15a/0x1e0 [] shrink_slab+0x125/0x180 [] kswapd+0x560/0x7a0 [] ? isolate_pages_global+0x0/0x2c0 [] ? autoremove_wake_function+0x0/0x40 [] ? trace_hardirqs_on+0xd/0x10 [] ? kswapd+0x0/0x7a0 [] kthread+0x5b/0xa0 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 [eparis@redhat.com: fix audit too] Cc: Al Viro Cc: Matt Mackall Cc: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Eric Paris Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index bed766e435b..1634319e240 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -220,7 +220,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, rem = 0; } - kevent->name = kmalloc(len + rem, GFP_KERNEL); + kevent->name = kmalloc(len + rem, GFP_NOFS); if (unlikely(!kevent->name)) { kmem_cache_free(event_cachep, kevent); return NULL; -- cgit v1.2.3 From df3935ffd6166fdd00702cf548fb5bb55737758b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 May 2009 16:02:53 -0700 Subject: fiemap: fix problem with setting FIEMAP_EXTENT_LAST Fix a problem where the generic block based fiemap stuff would not properly set FIEMAP_EXTENT_LAST on the last extent. I've reworked things to keep track if we go past the EOF, and mark the last extent properly. The problem was reported by and tested by Eric Sandeen. Tested-by: Eric Sandeen Signed-off-by: Josef Bacik Cc: Cc: Cc: Cc: Steven Whitehouse Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioctl.c | 75 +++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ioctl.c b/fs/ioctl.c index ac2d47e4392..82d9c42b8ba 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -258,7 +258,7 @@ int __generic_block_fiemap(struct inode *inode, long long length = 0, map_len = 0; u64 logical = 0, phys = 0, size = 0; u32 flags = FIEMAP_EXTENT_MERGED; - int ret = 0; + int ret = 0, past_eof = 0, whole_file = 0; if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) return ret; @@ -266,6 +266,9 @@ int __generic_block_fiemap(struct inode *inode, start_blk = logical_to_blk(inode, start); length = (long long)min_t(u64, len, i_size_read(inode)); + if (length < len) + whole_file = 1; + map_len = length; do { @@ -282,11 +285,26 @@ int __generic_block_fiemap(struct inode *inode, /* HOLE */ if (!buffer_mapped(&tmp)) { + length -= blk_to_logical(inode, 1); + start_blk++; + + /* + * we want to handle the case where there is an + * allocated block at the front of the file, and then + * nothing but holes up to the end of the file properly, + * to make sure that extent at the front gets properly + * marked with FIEMAP_EXTENT_LAST + */ + if (!past_eof && + blk_to_logical(inode, start_blk) >= + blk_to_logical(inode, 0)+i_size_read(inode)) + past_eof = 1; + /* * first hole after going past the EOF, this is our * last extent */ - if (length <= 0) { + if (past_eof && size) { flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, @@ -294,15 +312,37 @@ int __generic_block_fiemap(struct inode *inode, break; } - length -= blk_to_logical(inode, 1); - /* if we have holes up to/past EOF then we're done */ - if (length <= 0) + if (length <= 0 || past_eof) break; - - start_blk++; } else { - if (length <= 0 && size) { + /* + * we have gone over the length of what we wanted to + * map, and it wasn't the entire file, so add the extent + * we got last time and exit. + * + * This is for the case where say we want to map all the + * way up to the second to the last block in a file, but + * the last block is a hole, making the second to last + * block FIEMAP_EXTENT_LAST. In this case we want to + * see if there is a hole after the second to last block + * so we can mark it properly. If we found data after + * we exceeded the length we were requesting, then we + * are good to go, just add the extent to the fieinfo + * and break + */ + if (length <= 0 && !whole_file) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + break; + } + + /* + * if size != 0 then we know we already have an extent + * to add, so add it. + */ + if (size) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); @@ -319,19 +359,14 @@ int __generic_block_fiemap(struct inode *inode, start_blk += logical_to_blk(inode, size); /* - * if we are past the EOF we need to loop again to see - * if there is a hole so we can mark this extent as the - * last one, and if not keep mapping things until we - * find a hole, or we run out of slots in the extent - * array + * If we are past the EOF, then we need to make sure as + * soon as we find a hole that the last extent we found + * is marked with FIEMAP_EXTENT_LAST */ - if (length <= 0) - continue; - - ret = fiemap_fill_next_extent(fieinfo, logical, phys, - size, flags); - if (ret) - break; + if (!past_eof && + logical+size >= + blk_to_logical(inode, 0)+i_size_read(inode)) + past_eof = 1; } cond_resched(); } while (1); -- cgit v1.2.3 From 90e4ee5d311d4e0729daa676b1d7f754265b5874 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 8 May 2009 03:04:30 +0000 Subject: [CIFS] Fix double list addition in cifs posix open code Remove adding open file entry twice to lists in the file Do not fill file info twice in case of posix opens and creates Signed-off-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/dir.c | 15 +++++++++------ fs/cifs/file.c | 14 -------------- 2 files changed, 9 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 461750e0136..11431ed72a7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -281,6 +281,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int create_options = CREATE_NOT_DIR; int oplock = 0; int oflags; + bool posix_create = false; /* * BB below access is probably too much for mknod to request * but we have to do query and setpathinfo so requesting @@ -328,11 +329,13 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, negotation. EREMOTE indicates DFS junction, which is not handled in posix open */ - if ((rc == 0) && (newinode == NULL)) - goto cifs_create_get_file_info; /* query inode info */ - else if (rc == 0) /* success, no need to query */ - goto cifs_create_set_dentry; - else if ((rc != -EIO) && (rc != -EREMOTE) && + if (rc == 0) { + posix_create = true; + if (newinode == NULL) /* query inode info */ + goto cifs_create_get_file_info; + else /* success, no need to query */ + goto cifs_create_set_dentry; + } else if ((rc != -EIO) && (rc != -EREMOTE) && (rc != -EOPNOTSUPP)) /* path not found or net err */ goto cifs_create_out; /* else fallthrough to retry, using older open call, this is @@ -464,7 +467,7 @@ cifs_create_set_dentry: if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, tcon, fileHandle); - } else if (newinode) { + } else if (!(posix_create) && (newinode)) { cifs_fill_fileinfo(newinode, fileHandle, cifs_sb->tcon, write_only); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 50ca088d886..38c06f82657 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -129,15 +129,12 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode, struct file *file, struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); -/* struct timespec temp; */ /* BB REMOVEME BB */ file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (file->private_data == NULL) return -ENOMEM; pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); pCifsInode = CIFS_I(file->f_path.dentry->d_inode); if (pCifsInode == NULL) { @@ -145,17 +142,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode, return -EINVAL; } - /* want handles we can use to read with first - in the list so we do not have to walk the - list to search for one in write_begin */ - if ((file->f_flags & O_ACCMODE) == O_WRONLY) { - list_add_tail(&pCifsFile->flist, - &pCifsInode->openFileList); - } else { - list_add(&pCifsFile->flist, - &pCifsInode->openFileList); - } - if (pCifsInode->clientCanCacheRead) { /* we have the inode open somewhere else no need to discard cache data */ -- cgit v1.2.3 From 85c2a74fabadfc561b75fbd7decc6bcbfe873d57 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 23:38:46 +0900 Subject: nilfs2: fix possible recovery failure due to block creation without writer Some function calls in nilfs_prepare_segment_for_recovery() may fail because they can create blocks on meta data files without configuring a writable FS-instance. Concretely, nilfs_mdt_create_block() routine of meta data files will fail in that case. This fixes the problem by temporarily attaching a writable FS-instace during the function is called. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 4fc081e47d7..57afa9d2406 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -407,6 +407,7 @@ void nilfs_dispose_segment_list(struct list_head *head) } static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { struct list_head *head = &ri->ri_used_segments; @@ -421,6 +422,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; + nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -459,10 +461,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, nilfs->ns_pseg_offset = 0; nilfs->ns_seg_seq = ri->ri_seq + 2; nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; - return 0; failed: /* No need to recover sufile because it will be destroyed on error */ + nilfs_detach_writer(nilfs, sbi); return err; } @@ -728,7 +730,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { - err = nilfs_prepare_segment_for_recovery(nilfs, ri); + err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: Error preparing segments for " "recovery.\n"); -- cgit v1.2.3 From 201913ed746c7724a40d33ee5a0b6a1fd2ef3193 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 21:04:59 +0900 Subject: nilfs2: fix circular locking dependency of writer mutex This fixes the following circular locking dependency problem: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3 #5 ------------------------------------------------------- segctord/3895 is trying to acquire lock: (&nilfs->ns_writer_mutex){+.+...}, at: [] nilfs_mdt_get_block+0x89/0x20f [nilfs2] but task is already holding lock: (&bmap->b_sem){++++..}, at: [] nilfs_bmap_propagate+0x14/0x2e [nilfs2] which lock already depends on the new lock. The bugfix is done by replacing call sites of nilfs_get_writer() which are never called from read-only context with direct dereferencing of pointer to a writable FS-instance. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 8 +++++--- fs/nilfs2/mdt.c | 13 +++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 108d281ebca..be387c6b2d4 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -516,14 +516,16 @@ static ssize_t nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); + struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; - if (unlikely(!sbi)) + if (unlikely(!sbi)) { + /* never happens because called for a writable mount */ + WARN_ON(1); return -EROFS; + } ret = nilfs_segctor_add_segments_to_be_freed( NILFS_SC(sbi), buf, nmembs); - nilfs_put_writer(nilfs); return (ret < 0) ? ret : nmembs; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 47dd815433f..e1c6777931b 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -77,19 +77,22 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, void *)) { struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; if (!sb) { - writer = nilfs_get_writer(nilfs); - if (!writer) { + /* + * Make sure this function is not called from any + * read-only context. + */ + if (!nilfs->ns_writer) { + WARN_ON(1); err = -EROFS; goto out; } - sb = writer->s_super; + sb = nilfs->ns_writer->s_super; } nilfs_transaction_begin(sb, &ti, 0); @@ -127,8 +130,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - if (writer) - nilfs_put_writer(nilfs); out: return err; } -- cgit v1.2.3 From 0c7a531a200480c7bc447260376973d830da9069 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 30 Apr 2009 14:52:58 +0100 Subject: GFS2: Fix glock ref counting bug Depending on the ordering of events as we go around the glock shrinker loop, it is possible to drop the ref count of a glock incorrectly. It doesn't happen very often. This patch corrects the got_ref variable, fixing the problem. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1afd9f26bcb..ff498109048 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1304,6 +1304,7 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) nr--; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); + got_ref = 0; } spin_lock(&lru_lock); if (may_demote) -- cgit v1.2.3 From 67e55205ec55cc7899f1d783f217961596798419 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Fri, 24 Apr 2009 09:06:53 +0200 Subject: vfs: umount_begin BKL pushdown Push BKL down into ->umount_begin() Signed-off-by: Alessio Igor Bogani Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 6 +++++- fs/cifs/cifsfs.c | 3 +++ fs/fuse/inode.c | 3 +++ fs/namespace.c | 2 -- fs/nfs/super.c | 7 ++++++- 5 files changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 5f8ab8adb5f..7d23214e559 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -230,9 +231,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) static void v9fs_umount_begin(struct super_block *sb) { - struct v9fs_session_info *v9ses = sb->s_fs_info; + struct v9fs_session_info *v9ses; + lock_kernel(); + v9ses = sb->s_fs_info; v9fs_session_cancel(v9ses); + unlock_kernel(); } static const struct super_operations v9fs_super_ops = { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0d6d8b57365..355e0efec0c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE @@ -530,6 +531,7 @@ static void cifs_umount_begin(struct super_block *sb) if (tcon == NULL) return; + lock_kernel(); read_lock(&cifs_tcp_ses_lock); if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; @@ -548,6 +550,7 @@ static void cifs_umount_begin(struct super_block *sb) } /* BB FIXME - finish add checks for tidStatus BB */ + unlock_kernel(); return; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 459b73dd45e..d1bc4d33ccb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -259,7 +260,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, static void fuse_umount_begin(struct super_block *sb) { + lock_kernel(); fuse_abort_conn(get_fuse_conn_super(sb)); + unlock_kernel(); } static void fuse_send_destroy(struct fuse_conn *fc) diff --git a/fs/namespace.c b/fs/namespace.c index 41196209a90..0d2003fb437 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1073,9 +1073,7 @@ static int do_umount(struct vfsmount *mnt, int flags) */ if (flags & MNT_FORCE && sb->s_op->umount_begin) { - lock_kernel(); sb->s_op->umount_begin(sb); - unlock_kernel(); } /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6717200923f..1679a164c8c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -683,9 +683,12 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) */ static void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); + struct nfs_server *server; struct rpc_clnt *rpc; + lock_kernel(); + + server = NFS_SB(sb); /* -EIO all pending I/O */ rpc = server->client_acl; if (!IS_ERR(rpc)) @@ -693,6 +696,8 @@ static void nfs_umount_begin(struct super_block *sb) rpc = server->client; if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); + + unlock_kernel(); } /* -- cgit v1.2.3 From 5a6059c358d3b3004cb6b8562e3b049b1f8564f9 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 May 2009 12:11:12 -0400 Subject: reiserfs: Expand i_mutex to enclose lookup_one_len 2.6.30-rc3 introduced some sanity checks in the VFS code to avoid NFS bugs by ensuring that lookup_one_len is always called under i_mutex. This patch expands the i_mutex locking to enclose lookup_one_len. This was always required, but not not enforced in the reiserfs code since it does locking around the xattr interactions with the xattr_sem. This is obvious enough, and it survived an overnight 50 thread ACL test. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f83f52bae39..31a3dbb120e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -120,25 +120,20 @@ static struct dentry *lookup_or_create_dir(struct dentry *parent, struct dentry *dentry; BUG_ON(!parent); + mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); dentry = lookup_one_len(name, parent, strlen(name)); - if (IS_ERR(dentry)) - return dentry; - else if (!dentry->d_inode) { + if (!IS_ERR(dentry) && !dentry->d_inode) { int err = -ENODATA; - if (xattr_may_create(flags)) { - mutex_lock_nested(&parent->d_inode->i_mutex, - I_MUTEX_XATTR); + if (xattr_may_create(flags)) err = xattr_mkdir(parent->d_inode, dentry, 0700); - mutex_unlock(&parent->d_inode->i_mutex); - } if (err) { dput(dentry); dentry = ERR_PTR(err); } } - + mutex_unlock(&parent->d_inode->i_mutex); return dentry; } @@ -184,6 +179,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, { struct reiserfs_dentry_buf *dbuf = buf; struct dentry *dentry; + WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; @@ -349,6 +345,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name, if (IS_ERR(xadir)) return ERR_CAST(xadir); + mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); xafile = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(xafile)) { err = PTR_ERR(xafile); @@ -360,18 +357,15 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name, if (!xafile->d_inode) { err = -ENODATA; - if (xattr_may_create(flags)) { - mutex_lock_nested(&xadir->d_inode->i_mutex, - I_MUTEX_XATTR); + if (xattr_may_create(flags)) err = xattr_create(xadir->d_inode, xafile, 0700|S_IFREG); - mutex_unlock(&xadir->d_inode->i_mutex); - } } if (err) dput(xafile); out: + mutex_unlock(&xadir->d_inode->i_mutex); dput(xadir); if (err) return ERR_PTR(err); @@ -435,6 +429,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name) if (IS_ERR(xadir)) return PTR_ERR(xadir); + mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); dentry = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -442,14 +437,13 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name) } if (dentry->d_inode) { - mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); err = xattr_unlink(xadir->d_inode, dentry); - mutex_unlock(&xadir->d_inode->i_mutex); update_ctime(inode); } dput(dentry); out_dput: + mutex_unlock(&xadir->d_inode->i_mutex); dput(xadir); return err; } @@ -906,9 +900,9 @@ static int create_privroot(struct dentry *dentry) { int err; struct inode *inode = dentry->d_parent->d_inode; - mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); + err = xattr_mkdir(inode, dentry, 0700); - mutex_unlock(&inode->i_mutex); if (err) { dput(dentry); dentry = NULL; @@ -980,6 +974,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) /* If we don't have the privroot located yet - go find it */ if (!REISERFS_SB(s)->priv_root) { struct dentry *dentry; + mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { @@ -993,6 +988,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) } } else err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); if (!err && dentry) { s->s_root->d_op = &xattr_lookup_poison_ops; -- cgit v1.2.3 From edcc37a0478836b4a51eafb1bcec6a52708f681d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 May 2009 06:00:05 -0400 Subject: Always lookup priv_root on reiserfs mount and keep it ... even if it's a negative dentry. That way we can set ->d_op on root before anyone could race with us. Simplify d_compare(), while we are at it. Signed-off-by: Al Viro --- fs/reiserfs/super.c | 6 ++-- fs/reiserfs/xattr.c | 86 ++++++++++++++++++++++------------------------------- 2 files changed, 40 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0ae6486d904..d444fe0013a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1842,7 +1842,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error; } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; @@ -1855,7 +1856,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_info(s, "using 3.5.x disk format\n"); } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 31a3dbb120e..2891f789f54 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -903,16 +903,19 @@ static int create_privroot(struct dentry *dentry) WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); err = xattr_mkdir(inode, dentry, 0700); - if (err) { - dput(dentry); - dentry = NULL; + if (err || !dentry->d_inode) { + reiserfs_warning(dentry->d_sb, "jdm-20006", + "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. " + "Failing mount."); + return -EOPNOTSUPP; } - if (dentry && dentry->d_inode) - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); + dentry->d_inode->i_flags |= S_PRIVATE; + reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " + "storage.\n", PRIVROOT_NAME); - return err; + return 0; } static int xattr_mount_check(struct super_block *s) @@ -944,11 +947,9 @@ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp(name->name, priv_root->d_name.name, name->len)) { + if (container_of(q1, struct dentry, d_name) == priv_root) return -ENOENT; - } else if (q1->len == name->len && + if (q1->len == name->len && !memcmp(q1->name, name->name, name->len)) return 0; return 1; @@ -958,6 +959,27 @@ static const struct dentry_operations xattr_lookup_poison_ops = { .d_compare = xattr_lookup_poison, }; +int reiserfs_lookup_privroot(struct super_block *s) +{ + struct dentry *dentry; + int err = 0; + + /* If we don't have the privroot located yet - go find it */ + mutex_lock(&s->s_root->d_inode->i_mutex); + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + REISERFS_SB(s)->priv_root = dentry; + s->s_root->d_op = &xattr_lookup_poison_ops; + if (dentry->d_inode) + dentry->d_inode->i_flags |= S_PRIVATE; + } else + err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); + + return err; +} + /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ @@ -969,48 +991,12 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) err = xattr_mount_check(s); if (err) goto error; -#endif - /* If we don't have the privroot located yet - go find it */ - if (!REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { -#ifdef CONFIG_REISERFS_FS_XATTR - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) - err = create_privroot(dentry); -#endif - if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } - } else - err = PTR_ERR(dentry); + if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + mutex_lock(&s->s_root->d_inode->i_mutex); + err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - dentry->d_inode->i_flags |= S_PRIVATE; - REISERFS_SB(s)->priv_root = dentry; -#ifdef CONFIG_REISERFS_FS_XATTR - /* xattrs are unavailable */ - } else if (!(mount_flags & MS_RDONLY)) { - /* If we're read-only it just means that the dir - * hasn't been created. Not an error -- just no - * xattrs on the fs. We'll check again if we - * go read-write */ - reiserfs_warning(s, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - err = -EOPNOTSUPP; -#endif - } } - -#ifdef CONFIG_REISERFS_FS_XATTR if (!err) s->s_xattr = reiserfs_xattr_handlers; -- cgit v1.2.3 From ab17c4f02156c4f75d7fa43a5aa2a7f942d47201 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:15 -0400 Subject: reiserfs: fixup xattr_root caching The xattr_root caching was broken from my previous patch set. It wouldn't cause corruption, but could cause decreased performance due to allocating a larger chunk of the journal (~ 27 blocks) than it would actually use. This patch loads the xattr root dentry at xattr initialization and creates it on-demand. Since we're using the cached dentry, there's no point in keeping lookup_or_create_dir around, so that's removed. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 73 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2891f789f54..c77984473db 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -113,36 +113,28 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) -/* Returns and possibly creates the xattr dir. */ -static struct dentry *lookup_or_create_dir(struct dentry *parent, - const char *name, int flags) +static struct dentry *open_xa_root(struct super_block *sb, int flags) { - struct dentry *dentry; - BUG_ON(!parent); + struct dentry *privroot = REISERFS_SB(sb)->priv_root; + struct dentry *xaroot; + if (!privroot->d_inode) + return ERR_PTR(-ENODATA); - mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && !dentry->d_inode) { - int err = -ENODATA; + mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); + xaroot = dget(REISERFS_SB(sb)->xattr_root); + if (!xaroot->d_inode) { + int err = -ENODATA; if (xattr_may_create(flags)) - err = xattr_mkdir(parent->d_inode, dentry, 0700); - + err = xattr_mkdir(privroot->d_inode, xaroot, 0700); if (err) { - dput(dentry); - dentry = ERR_PTR(err); + dput(xaroot); + xaroot = ERR_PTR(err); } } - mutex_unlock(&parent->d_inode->i_mutex); - return dentry; -} -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - if (!privroot) - return ERR_PTR(-ENODATA); - return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + mutex_unlock(&privroot->d_inode->i_mutex); + return xaroot; } static struct dentry *open_xa_dir(const struct inode *inode, int flags) @@ -158,10 +150,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); - xadir = lookup_or_create_dir(xaroot, namebuf, flags); + mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); + + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (!IS_ERR(xadir) && !xadir->d_inode) { + int err = -ENODATA; + if (xattr_may_create(flags)) + err = xattr_mkdir(xaroot->d_inode, xadir, 0700); + if (err) { + dput(xadir); + xadir = ERR_PTR(err); + } + } + + mutex_unlock(&xaroot->d_inode->i_mutex); dput(xaroot); return xadir; - } /* The following are side effects of other operations that aren't explicitly @@ -986,19 +990,33 @@ int reiserfs_lookup_privroot(struct super_block *s) int reiserfs_xattr_init(struct super_block *s, int mount_flags) { int err = 0; + struct dentry *privroot = REISERFS_SB(s)->priv_root; #ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; - if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { mutex_lock(&s->s_root->d_inode->i_mutex); err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); } - if (!err) + + if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; + mutex_lock(&privroot->d_inode->i_mutex); + if (!REISERFS_SB(s)->xattr_root) { + struct dentry *dentry; + dentry = lookup_one_len(XAROOT_NAME, privroot, + strlen(XAROOT_NAME)); + if (!IS_ERR(dentry)) + REISERFS_SB(s)->xattr_root = dentry; + else + err = PTR_ERR(dentry); + } + mutex_unlock(&privroot->d_inode->i_mutex); + } error: if (err) { @@ -1008,11 +1026,12 @@ error: #endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; #ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; + else #endif + s->s_flags &= ~MS_POSIXACL; return err; } -- cgit v1.2.3 From b82bb72ba7df473461c5e2368a4e7497c8ce76e9 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:16 -0400 Subject: reiserfs: dont associate security.* with xattr files The security.* xattrs are ignored for xattr files, so don't create them. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr_security.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 4d3c20e787c..a92c8792c0f 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -55,8 +55,16 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, struct reiserfs_security_handle *sec) { int blocks = 0; - int error = security_inode_init_security(inode, dir, &sec->name, - &sec->value, &sec->length); + int error; + + sec->name = NULL; + + /* Don't add selinux attributes on xattrs - they'll never get used */ + if (IS_PRIVATE(dir)) + return 0; + + error = security_inode_init_security(inode, dir, &sec->name, + &sec->value, &sec->length); if (error) { if (error == -EOPNOTSUPP) error = 0; -- cgit v1.2.3 From 677c9b2e393a0cd203bd54e9c18b012b2c73305a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:17 -0400 Subject: reiserfs: remove privroot hiding in lookup With Al Viro's patch to move privroot lookup to fs mount, there's no need to have special code to hide the privroot in reiserfs_lookup. I've also cleaned up the privroot hiding in reiserfs_readdir_dentry and removed the last user of reiserfs_xattrs(). Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 24 +++++++++++++----------- fs/reiserfs/namei.c | 17 ++--------------- fs/reiserfs/xattr.c | 2 +- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 67a80d7e59e..45ee3d357c7 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, #define store_ih(where,what) copy_item_head (where, what) +static inline bool is_privroot_deh(struct dentry *dir, + struct reiserfs_de_head *deh) +{ + int ret = 0; +#ifdef CONFIG_REISERFS_FS_XATTR + struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; + ret = (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); +#endif + return ret; +} + int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, filldir_t filldir, loff_t *pos) { @@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, } /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs(inode->i_sb) && - !old_format_only(inode->i_sb) && - dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode - && deh_objectid(deh) == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(inode->i_sb)-> - priv_root->d_inode)-> - k_objectid)) { + if (is_privroot_deh(dentry, deh)) continue; - } d_off = deh_offset(deh); *pos = d_off; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index efd4d720718..27157912863 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, &path_to_entry, &de); pathrelse(&path_to_entry); if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs(dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> - k_objectid)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - inode = - reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + inode = reiserfs_iget(dir->i_sb, + (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c77984473db..2237e10c7c7 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -841,7 +841,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (!dentry->d_inode) return -EINVAL; - if (!reiserfs_xattrs(dentry->d_sb) || + if (!dentry->d_sb->s_xattr || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; -- cgit v1.2.3 From 74dbbdd7fdc11763f4698d2f3e684cf4446951e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:07:50 -0400 Subject: New helper: deactivate_locked_super() Does equivalent of up_write(&s->s_umount); deactivate_super(s); However, it does not does not unlock it until it's all over. As the result, it's safe to use to dispose of new superblock on ->get_sb() failure exits - nobody will see the sucker until it's all over. Equivalent using up_write/deactivate_super is safe for that purpose if superblock is either safe to use or has NULL ->s_root when we unlock. Normally filesystems take the required precautions, but a) we do have bugs in that area in some of them. b) up_write/deactivate_super sequence is extremely common, so the helper makes sense anyway. Signed-off-by: Al Viro --- fs/super.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 786fe7d7279..a9dc4c33ef4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -207,6 +207,34 @@ void deactivate_super(struct super_block *s) EXPORT_SYMBOL(deactivate_super); +/** + * deactivate_locked_super - drop an active reference to superblock + * @s: superblock to deactivate + * + * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that + * it does not unlock it until it's all over. As the result, it's safe to + * use to dispose of new superblock on ->get_sb() failure exits - nobody + * will see the sucker until it's all over. Equivalent using up_write + + * deactivate_super is safe for that purpose only if superblock is either + * safe to use or has NULL ->s_root when we unlock. + */ +void deactivate_locked_super(struct super_block *s) +{ + struct file_system_type *fs = s->s_type; + if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { + s->s_count -= S_BIAS-1; + spin_unlock(&sb_lock); + vfs_dq_off(s, 0); + fs->kill_sb(s); + put_filesystem(fs); + put_super(s); + } else { + up_write(&s->s_umount); + } +} + +EXPORT_SYMBOL(deactivate_locked_super); + /** * grab_super - acquire an active reference * @s: reference we are trying to make active @@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } @@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_bdev; } @@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type, sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error; } @@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type, error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type, s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void return mnt; out_sb: dput(mnt->mnt_root); - up_write(&mnt->mnt_sb->s_umount); - deactivate_super(mnt->mnt_sb); + deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: -- cgit v1.2.3 From 6f5bbff9a1b7d6864a495763448a363bbfa96324 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:34:22 -0400 Subject: Convert obvious places to deactivate_locked_super() Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 5 +---- fs/afs/super.c | 3 +-- fs/btrfs/super.c | 12 ++++-------- fs/cifs/cifsfs.c | 3 +-- fs/devpts/inode.c | 5 ++--- fs/ecryptfs/main.c | 5 ++--- fs/libfs.c | 3 +-- fs/nfs/super.c | 15 +++++---------- fs/proc/root.c | 3 +-- fs/ubifs/super.c | 3 +-- 10 files changed, 19 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7d23214e559..0d29a57c63e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -174,10 +174,7 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); return 0; release_sb: - if (sb) { - up_write(&sb->s_umount); - deactivate_super(sb); - } + deactivate_locked_super(sb); free_stat: kfree(st); diff --git a/fs/afs/super.c b/fs/afs/super.c index aee239a048c..2753f16dd31 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -405,8 +405,7 @@ static int afs_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); goto error; } sb->s_options = new_opts; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3536bdb2d7c..6dfae5b28f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -502,8 +502,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_close_devices; } @@ -517,8 +516,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error_free_subvol_name; } @@ -535,15 +533,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = PTR_ERR(root); goto error_free_subvol_name; } if (!root->d_inode) { dput(root); - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -ENXIO; goto error_free_subvol_name; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 355e0efec0c..5e6d35804d7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -602,8 +602,7 @@ cifs_get_sb(struct file_system_type *fs_type, rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); if (rc) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return rc; } sb->s_flags |= MS_ACTIVE; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 63a4a59e414..21165cf934f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -389,11 +389,10 @@ static int devpts_get_sb(struct file_system_type *fs_type, return 0; out_dput: - dput(s->s_root); + dput(s->s_root); /* undo dget() in simple_set_mnt() */ out_undo_sget: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index ccabd5faa04..9f0aa9883c2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -614,9 +614,8 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, } goto out; out_abort: - dput(sb->s_root); - up_write(&sb->s_umount); - deactivate_super(sb); + dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ + deactivate_locked_super(sb); out: return rc; } diff --git a/fs/libfs.c b/fs/libfs.c index cd223190c4e..80046ddf506 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,8 +246,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, return 0; Enomem: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return -ENOMEM; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1679a164c8c..d2d67781c57 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2111,8 +2111,7 @@ out_err_nosb: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2208,8 +2207,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2469,8 +2467,7 @@ out_free: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2564,8 +2561,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2649,8 +2645,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); return error; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 1e15a2b176e..b080b791d9e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -67,8 +67,7 @@ static int proc_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; err = proc_fill_super(sb); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index faa44f90608..e9f7a754c4f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2055,8 +2055,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, return 0; out_deact: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); return err; -- cgit v1.2.3 From c96f58573778ddf96ff67108a635f3f642ea63d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:35:04 -0400 Subject: Fix a leak in failure exit in 9p ->get_sb() Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 0d29a57c63e..ab5547ff29a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -156,6 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { + iput(inode); retval = -ENOMEM; goto release_sb; } -- cgit v1.2.3 From a731d12d6ddd1e703770cacb5dfecb155b03ee06 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 6 Apr 2009 16:43:42 -0700 Subject: ocfs2: Use nd_set_link(). ocfs2 was hand-calling vfs_follow_link(), but there's no point to that. Let's use page_follow_link_light() and nd_set_link(). Signed-off-by: Joel Becker Signed-off-by: Al Viro --- fs/ocfs2/symlink.c | 77 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index ed0a0cfd68d..579dd1b1110 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -39,6 +39,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -54,26 +55,6 @@ #include "buffer_head_io.h" -static char *ocfs2_page_getlink(struct dentry * dentry, - struct page **ppage); -static char *ocfs2_fast_symlink_getlink(struct inode *inode, - struct buffer_head **bh); - -/* get the link contents into pagecache */ -static char *ocfs2_page_getlink(struct dentry * dentry, - struct page **ppage) -{ - struct page * page; - struct address_space *mapping = dentry->d_inode->i_mapping; - page = read_mapping_page(mapping, 0, NULL); - if (IS_ERR(page)) - goto sync_fail; - *ppage = page; - return kmap(page); - -sync_fail: - return (char*)page; -} static char *ocfs2_fast_symlink_getlink(struct inode *inode, struct buffer_head **bh) @@ -128,40 +109,55 @@ out: return ret; } -static void *ocfs2_follow_link(struct dentry *dentry, - struct nameidata *nd) +static void *ocfs2_fast_follow_link(struct dentry *dentry, + struct nameidata *nd) { - int status; - char *link; + int status = 0; + int len; + char *target, *link = ERR_PTR(-ENOMEM); struct inode *inode = dentry->d_inode; - struct page *page = NULL; struct buffer_head *bh = NULL; - - if (ocfs2_inode_is_fast_symlink(inode)) - link = ocfs2_fast_symlink_getlink(inode, &bh); - else - link = ocfs2_page_getlink(dentry, &page); - if (IS_ERR(link)) { - status = PTR_ERR(link); + + mlog_entry_void(); + + BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); + target = ocfs2_fast_symlink_getlink(inode, &bh); + if (IS_ERR(target)) { + status = PTR_ERR(target); mlog_errno(status); goto bail; } - status = vfs_follow_link(nd, link); + /* Fast symlinks can't be large */ + len = strlen(target); + link = kzalloc(len + 1, GFP_NOFS); + if (!link) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + memcpy(link, target, len); + nd_set_link(nd, link); bail: - if (page) { - kunmap(page); - page_cache_release(page); - } brelse(bh); - return ERR_PTR(status); + mlog_exit(status); + return status ? ERR_PTR(status) : link; +} + +static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +{ + char *link = cookie; + + kfree(link); } const struct inode_operations ocfs2_symlink_inode_operations = { .readlink = page_readlink, - .follow_link = ocfs2_follow_link, + .follow_link = page_follow_link_light, + .put_link = page_put_link, .getattr = ocfs2_getattr, .setattr = ocfs2_setattr, .setxattr = generic_setxattr, @@ -171,7 +167,8 @@ const struct inode_operations ocfs2_symlink_inode_operations = { }; const struct inode_operations ocfs2_fast_symlink_inode_operations = { .readlink = ocfs2_readlink, - .follow_link = ocfs2_follow_link, + .follow_link = ocfs2_fast_follow_link, + .put_link = ocfs2_fast_put_link, .getattr = ocfs2_getattr, .setattr = ocfs2_setattr, .setxattr = generic_setxattr, -- cgit v1.2.3 From c490d79bb70c549e096099576b1df40a8810b0d8 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:53 +1000 Subject: fs: dcache fix LRU ordering Fix ordering of LRU when moving referenced dentries to the head of the list (they should go to the head of the list in the same order as they were found from the tail, rather than reverse order). Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 1fcffebfb44..75659a6fd1f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -481,7 +481,7 @@ restart: if ((flags & DCACHE_REFERENCED) && (dentry->d_flags & DCACHE_REFERENCED)) { dentry->d_flags &= ~DCACHE_REFERENCED; - list_move_tail(&dentry->d_lru, &referenced); + list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); } else { list_move_tail(&dentry->d_lru, &tmp); -- cgit v1.2.3 From 774e33e70b2bffa8c602d22a5d27c0061a0039cc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 26 Apr 2009 14:51:17 +0200 Subject: ROMFS: romfs_dev_read() error ignored romfs_dev_read() may return -EIO, but ret is unsigned, so the errorpath isn't taken. Signed-off-by: Roel Kluin Signed-off-by: Al Viro --- fs/romfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/romfs/super.c b/fs/romfs/super.c index c53b5ef8a02..4ab3c03d8f9 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -298,7 +298,8 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) struct romfs_inode ri; struct inode *i; unsigned long nlen; - unsigned nextfh, ret; + unsigned nextfh; + int ret; umode_t mode; /* we might have to traverse a chain of "hard link" file entries to get -- cgit v1.2.3 From ddbaaf3024d764ced700efb2d818709b90ea6fdd Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 29 Apr 2009 20:14:57 -0400 Subject: NULL noise in fs/super.c:kill_bdev_super() Signed-off-by: H Hartley Sweeten Cc: Subrata Modak Signed-off-by: Al Viro --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index a9dc4c33ef4..1943fdf655f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -922,7 +922,7 @@ void kill_block_super(struct super_block *sb) struct block_device *bdev = sb->s_bdev; fmode_t mode = sb->s_mode; - bdev->bd_super = 0; + bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); close_bdev_exclusive(bdev, mode); -- cgit v1.2.3 From 6b3304b531704711286c3359b06922b83fdba015 Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Tue, 31 Mar 2009 19:35:54 +0530 Subject: Make checkpatch.pl shut up on fs/inode.c Code Quality According To Mingo(tm) has been vastly improved, no code has been damaged^Wchanged^Wdamaged. [commit message rewritten -- AV] Signed-off-by: Manish Katiyar Signed-off-by: Al Viro --- fs/inode.c | 81 +++++++++++++++++++++++++++----------------------------------- 1 file changed, 35 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 6ad14a1cd8c..0571983755d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -99,7 +99,7 @@ static DEFINE_MUTEX(iprune_mutex); */ struct inodes_stat_t inodes_stat; -static struct kmem_cache * inode_cachep __read_mostly; +static struct kmem_cache *inode_cachep __read_mostly; static void wake_up_inode(struct inode *inode) { @@ -124,7 +124,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct address_space * const mapping = &inode->i_data; + struct address_space *const mapping = &inode->i_data; inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; @@ -216,7 +216,7 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } -void destroy_inode(struct inode *inode) +void destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); @@ -252,12 +252,11 @@ void inode_init_once(struct inode *inode) mutex_init(&inode->inotify_mutex); #endif } - EXPORT_SYMBOL(inode_init_once); static void init_once(void *foo) { - struct inode * inode = (struct inode *) foo; + struct inode *inode = (struct inode *) foo; inode_init_once(inode); } @@ -265,7 +264,7 @@ static void init_once(void *foo) /* * inode_lock must be held */ -void __iget(struct inode * inode) +void __iget(struct inode *inode) { if (atomic_read(&inode->i_count)) { atomic_inc(&inode->i_count); @@ -289,7 +288,7 @@ void clear_inode(struct inode *inode) { might_sleep(); invalidate_inode_buffers(inode); - + BUG_ON(inode->i_data.nrpages); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); @@ -303,7 +302,6 @@ void clear_inode(struct inode *inode) cd_forget(inode); inode->i_state = I_CLEAR; } - EXPORT_SYMBOL(clear_inode); /* @@ -351,8 +349,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) next = head->next; for (;;) { - struct list_head * tmp = next; - struct inode * inode; + struct list_head *tmp = next; + struct inode *inode; /* * We can reschedule here without worrying about the list's @@ -391,7 +389,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) * fails because there are busy inodes then a non zero value is returned. * If the discard is successful all the inodes have been discarded. */ -int invalidate_inodes(struct super_block * sb) +int invalidate_inodes(struct super_block *sb) { int busy; LIST_HEAD(throw_away); @@ -407,7 +405,6 @@ int invalidate_inodes(struct super_block * sb) return busy; } - EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) @@ -504,7 +501,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) * Nasty deadlock avoidance. We may hold various FS locks, * and we don't want to recurse into the FS that called us * in clear_inode() and friends.. - */ + */ if (!(gfp_mask & __GFP_FS)) return -1; prune_icache(nr); @@ -524,10 +521,13 @@ static void __wait_on_freeing_inode(struct inode *inode); * by hand after calling find_inode now! This simplifies iunique and won't * add any additional branch in the common code. */ -static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data) +static struct inode *find_inode(struct super_block *sb, + struct hlist_head *head, + int (*test)(struct inode *, void *), + void *data) { struct hlist_node *node; - struct inode * inode = NULL; + struct inode *inode = NULL; repeat: hlist_for_each_entry(inode, node, head, i_hash) { @@ -548,10 +548,11 @@ repeat: * find_inode_fast is the fast path version of find_inode, see the comment at * iget_locked for details. */ -static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino) +static struct inode *find_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) { struct hlist_node *node; - struct inode * inode = NULL; + struct inode *inode = NULL; repeat: hlist_for_each_entry(inode, node, head, i_hash) { @@ -631,10 +632,10 @@ struct inode *new_inode(struct super_block *sb) * here to attempt to avoid that. */ static unsigned int last_ino; - struct inode * inode; + struct inode *inode; spin_lock_prefetch(&inode_lock); - + inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); @@ -645,7 +646,6 @@ struct inode *new_inode(struct super_block *sb) } return inode; } - EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) @@ -674,7 +674,6 @@ void unlock_new_inode(struct inode *inode) inode->i_state &= ~(I_LOCK|I_NEW); wake_up_inode(inode); } - EXPORT_SYMBOL(unlock_new_inode); /* @@ -683,13 +682,17 @@ EXPORT_SYMBOL(unlock_new_inode); * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ -static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) +static struct inode *get_new_inode(struct super_block *sb, + struct hlist_head *head, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), + void *data) { - struct inode * inode; + struct inode *inode; inode = alloc_inode(sb); if (inode) { - struct inode * old; + struct inode *old; spin_lock(&inode_lock); /* We released the lock, so.. */ @@ -731,13 +734,14 @@ set_failed: * get_new_inode_fast is the fast path version of get_new_inode, see the * comment at iget_locked for details. */ -static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) +static struct inode *get_new_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) { - struct inode * inode; + struct inode *inode; inode = alloc_inode(sb); if (inode) { - struct inode * old; + struct inode *old; spin_lock(&inode_lock); /* We released the lock, so.. */ @@ -823,7 +827,6 @@ struct inode *igrab(struct inode *inode) spin_unlock(&inode_lock); return inode; } - EXPORT_SYMBOL(igrab); /** @@ -924,7 +927,6 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, return ifind(sb, head, test, data, 0); } - EXPORT_SYMBOL(ilookup5_nowait); /** @@ -953,7 +955,6 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, return ifind(sb, head, test, data, 1); } - EXPORT_SYMBOL(ilookup5); /** @@ -976,7 +977,6 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) return ifind_fast(sb, head, ino); } - EXPORT_SYMBOL(ilookup); /** @@ -1015,7 +1015,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, */ return get_new_inode(sb, head, test, set, data); } - EXPORT_SYMBOL(iget5_locked); /** @@ -1047,7 +1046,6 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) */ return get_new_inode_fast(sb, head, ino); } - EXPORT_SYMBOL(iget_locked); int insert_inode_locked(struct inode *inode) @@ -1076,7 +1074,6 @@ int insert_inode_locked(struct inode *inode) iput(old); } } - EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, @@ -1106,7 +1103,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, iput(old); } } - EXPORT_SYMBOL(insert_inode_locked4); /** @@ -1124,7 +1120,6 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); } - EXPORT_SYMBOL(__insert_inode_hash); /** @@ -1139,7 +1134,6 @@ void remove_inode_hash(struct inode *inode) hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); } - EXPORT_SYMBOL(remove_inode_hash); /* @@ -1187,7 +1181,6 @@ void generic_delete_inode(struct inode *inode) BUG_ON(inode->i_state != I_CLEAR); destroy_inode(inode); } - EXPORT_SYMBOL(generic_delete_inode); static void generic_forget_inode(struct inode *inode) @@ -1237,12 +1230,11 @@ void generic_drop_inode(struct inode *inode) else generic_forget_inode(inode); } - EXPORT_SYMBOL_GPL(generic_drop_inode); /* * Called when we're dropping the last reference - * to an inode. + * to an inode. * * Call the FS "drop()" function, defaulting to * the legacy UNIX filesystem behaviour.. @@ -1262,7 +1254,7 @@ static inline void iput_final(struct inode *inode) } /** - * iput - put an inode + * iput - put an inode * @inode: inode to put * * Puts an inode, dropping its usage count. If the inode use count hits @@ -1279,7 +1271,6 @@ void iput(struct inode *inode) iput_final(inode); } } - EXPORT_SYMBOL(iput); /** @@ -1290,10 +1281,10 @@ EXPORT_SYMBOL(iput); * Returns the block number on the device holding the inode that * is the disk block number for the block of the file requested. * That is, asked for block 4 of inode 1 the function will return the - * disk block relative to the disk start that holds that block of the + * disk block relative to the disk start that holds that block of the * file. */ -sector_t bmap(struct inode * inode, sector_t block) +sector_t bmap(struct inode *inode, sector_t block) { sector_t res = 0; if (inode->i_mapping->a_ops->bmap) @@ -1425,7 +1416,6 @@ void file_update_time(struct file *file) mark_inode_dirty_sync(inode); mnt_drop_write(file->f_path.mnt); } - EXPORT_SYMBOL(file_update_time); int inode_needs_sync(struct inode *inode) @@ -1436,7 +1426,6 @@ int inode_needs_sync(struct inode *inode) return 1; return 0; } - EXPORT_SYMBOL(inode_needs_sync); int inode_wait(void *word) -- cgit v1.2.3 From e24977d45f45d1675e050dc1a0aaf4bfc4ca9866 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 2 Apr 2009 21:17:03 -0400 Subject: Reduce path_lookup() abuses ... use kern_path() where possible [folded a fix from rdd] Signed-off-by: Al Viro --- fs/gfs2/ops_fstype.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 650a730707b..1ff9473ea75 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1282,21 +1282,21 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, static struct super_block *get_gfs2_sb(const char *dev_name) { struct super_block *sb; - struct nameidata nd; + struct path path; int error; - error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); + error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); return NULL; } - sb = nd.path.dentry->d_inode->i_sb; + sb = path.dentry->d_inode->i_sb; if (sb && (sb->s_type == &gfs2_fs_type)) atomic_inc(&sb->s_active); else sb = NULL; - path_put(&nd.path); + path_put(&path); return sb; } -- cgit v1.2.3 From a44ddbb6d8a8ffe4e34e417048dfdd8f3dd1de4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 09:38:49 -0400 Subject: Make open_exec() and sys_uselib() use may_open(), instead of duplicating its parts Signed-off-by: Al Viro --- fs/exec.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 639177b0eea..41ae8e0de72 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,11 +126,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = inode_permission(nd.path.dentry->d_inode, - MAY_READ | MAY_EXEC | MAY_OPEN); - if (error) - goto exit; - error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN); + error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); if (error) goto exit; @@ -677,10 +673,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); - if (err) - goto out_path_put; - err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN); + err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); if (err) goto out_path_put; -- cgit v1.2.3 From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro --- fs/exec.c | 72 ++++++++++++++++++++++---------------------------------------- fs/namei.c | 13 ++++++------ fs/open.c | 2 +- 3 files changed, 33 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72..895823d0149 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4..967c3db9272 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6ab..bdfbf03615a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); -- cgit v1.2.3 From f9dbd05bc97d1d4f17c2057612f6a8e4dbd039e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 19:56:21 -0400 Subject: switch ufs directories to ufs_sync_file() Signed-off-by: Al Viro --- fs/ufs/dir.c | 2 +- fs/ufs/file.c | 2 +- fs/ufs/ufs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index dbbbc466876..6321b797061 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -666,6 +666,6 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, - .fsync = file_fsync, + .fsync = ufs_sync_file, .llseek = generic_file_llseek, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 625ef17c6f8..2bd3a161571 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -30,7 +30,7 @@ #include "ufs.h" -static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int err; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 69b3427d788..d0c4acd4f1f 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -98,8 +98,8 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, /* file.c */ extern const struct inode_operations ufs_file_inode_operations; extern const struct file_operations ufs_file_operations; - extern const struct address_space_operations ufs_aops; +extern int ufs_sync_file(struct file *, struct dentry *, int); /* ialloc.c */ extern void ufs_free_inode (struct inode *inode); -- cgit v1.2.3 From 2a32cebd6cbcc43996c3e2d114fa32ba1e71192a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 16:05:57 -0400 Subject: Fix races around the access to ->s_options Put generic_show_options read access to s_options under rcu_read_lock, split save_mount_options() into "we are setting it the first time" (uses in foo_fill_super()) and "we are relacing and freeing the old one", synchronize_rcu() before kfree() in the latter. Signed-off-by: Al Viro --- fs/affs/super.c | 3 +-- fs/afs/super.c | 4 ++-- fs/hpfs/super.c | 3 +-- fs/namespace.c | 21 ++++++++++++++++++--- fs/reiserfs/super.c | 3 +-- 5 files changed, 23 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ce695e707f..63f5183f263 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } - kfree(sb->s_options); - sb->s_options = new_opts; + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/super.c b/fs/afs/super.c index 2753f16dd31..76828e5f8a3 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -408,17 +408,17 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_locked_super(sb); goto error; } - sb->s_options = new_opts; + save_mount_options(sb, new_opts); sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); - kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.cell); + kfree(new_opts); _leave(" = 0 [%p]", sb); return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fecf402d7b8..fc77965be84 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 0d2003fb437..134d494158d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s) */ int generic_show_options(struct seq_file *m, struct vfsmount *mnt) { - const char *options = mnt->mnt_sb->s_options; + const char *options; + + rcu_read_lock(); + options = rcu_dereference(mnt->mnt_sb->s_options); if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } + rcu_read_unlock(); return 0; } @@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options); */ void save_mount_options(struct super_block *sb, char *options) { - kfree(sb->s_options); - sb->s_options = kstrdup(options, GFP_KERNEL); + BUG_ON(sb->s_options); + rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); } EXPORT_SYMBOL(save_mount_options); +void replace_mount_options(struct super_block *sb, char *options) +{ + char *old = sb->s_options; + rcu_assign_pointer(sb->s_options, options); + if (old) { + synchronize_rcu(); + kfree(old); + } +} +EXPORT_SYMBOL(replace_mount_options); + #ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d444fe0013a..1215a4f50cd 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1316,8 +1316,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok: - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; out_err: -- cgit v1.2.3 From 843382370ec614768ac13582405f93635cf3637c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2009 21:52:06 +0900 Subject: nilfs2: ensure to clear dirty state when deleting metadata file block This would fix the following failure during GC: nilfs_cpfile_delete_checkpoints: cannot delete block NILFS: GC failed during preparation: cannot delete checkpoints: err=-2 The problem was caused by a break in state consistency between page cache and btree; the above block was removed from the btree but the page buffering the block was remaining in the page cache in dirty state. This resolves the inconsistency by ensuring to clear dirty state of the page buffering the deleted block. Reported-by: David Arendt Signed-off-by: Ryusuke Konishi --- fs/nilfs2/mdt.c | 2 +- fs/nilfs2/page.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index e1c6777931b..bb78745a0e3 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -300,7 +300,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) int err; err = nilfs_bmap_delete(ii->i_bmap, block); - if (likely(!err)) { + if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 1bfbba9c0e9..a2692bbc7b5 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -128,7 +128,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); - if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) + clear_buffer_dirty(bh); + if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); clear_buffer_uptodate(bh); -- cgit v1.2.3 From 47eb6b9c8fa963c9f49967ad1d9d7ec947d15b68 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 30 Apr 2009 02:21:00 +0900 Subject: nilfs2: fix possible circular locking for get information ioctls This is one of two patches which are to correct possible circular locking between mm->mmap_sem and nilfs->ns_segctor_sem. The problem was detected by lockdep check as follows: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00002-g3552613 #6 ------------------------------------------------------- mmap/5418 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_to_user+0x2c/0xfc [] nilfs_ioctl_wrap_copy+0x103/0x160 [nilfs2] [] nilfs_ioctl+0x30a/0x3b0 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff other info that might help us debug this: 1 lock held by mmap/5418: #0: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a stack backtrace: Pid: 5418, comm: mmap Not tainted 2.6.30-rc3-nilfs-00002-g3552613 #6 Call Trace: [] ? printk+0xf/0x12 [] print_circular_bug_tail+0xaa/0xb5 [] __lock_acquire+0xdcc/0x13b0 [] ? nilfs_sufile_get_stat+0x1e/0x105 [nilfs2] [] ? up_read+0x16/0x2c [] ? nilfs_sufile_get_stat+0xfa/0x105 [nilfs2] [] lock_acquire+0xba/0xdd [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] down_read+0x2a/0x3e [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] ? do_page_fault+0x1d8/0x30a [] ? down_read_trylock+0x39/0x43 [] do_page_fault+0x2fb/0x30a [] ? do_page_fault+0x0/0x30a [] error_code+0x72/0x78 [] ? do_page_fault+0x0/0x30a This makes the lock granularity of nilfs->ns_segctor_sem finer than that of the mmap semaphore for ioctl commands except nilfs_clean_segments(). The successive patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl") is required to fully resolve the problem. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 100 +++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index be387c6b2d4..e3c693d37d6 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -147,29 +147,12 @@ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); -} - -static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_cpinfo); + ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, + nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -195,28 +178,11 @@ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); -} - -static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_suinfo); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -242,28 +208,11 @@ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); -} - -static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_vinfo); + ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -276,17 +225,21 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, struct nilfs_bdesc *bdescs = buf; int ret, i; + down_read(&nilfs->ns_segctor_sem); for (i = 0; i < nmembs; i++) { ret = nilfs_bmap_lookup_at_level(bmap, bdescs[i].bd_offset, bdescs[i].bd_level + 1, &bdescs[i].bd_blocknr); if (ret < 0) { - if (ret != -ENOENT) + if (ret != -ENOENT) { + up_read(&nilfs->ns_segctor_sem); return ret; + } bdescs[i].bd_blocknr = 0; } } + up_read(&nilfs->ns_segctor_sem); return nmembs; } @@ -300,10 +253,8 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); - up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -623,6 +574,29 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp, + ssize_t (*dofunc)(struct the_nilfs *, + __u64 *, int, + void *, size_t, size_t)) + +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); + if (ret < 0) + return ret; + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; @@ -634,16 +608,18 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_DELETE_CHECKPOINT: return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: - return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: - return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: - /* XXX: rename to ??? */ - return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); case NILFS_IOCTL_CLEAN_SEGMENTS: -- cgit v1.2.3 From 4f6b828837b4e3836f2c9ac2f0eab9773b6c1327 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 10 May 2009 22:41:43 +0900 Subject: nilfs2: fix lock order reversal in nilfs_clean_segments ioctl This is a companion patch to ("nilfs2: fix possible circular locking for get information ioctls"). This corrects lock order reversal between mm->mmap_sem and nilfs->ns_segctor_sem in nilfs_clean_segments() which was detected by lockdep check: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00003-g360bdc1 #7 ------------------------------------------------------- mmap/5294 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_from_user+0x2a/0x111 [] nilfs_ioctl_prepare_clean_segments+0x1d/0xf1 [nilfs2] [] nilfs_clean_segments+0x6d/0x1b9 [nilfs2] [] nilfs_ioctl+0x2ad/0x318 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff where nilfs_clean_segments() holds: nilfs->ns_segctor_sem -> copy_from_user() --> page fault -> mm->mmap_sem And, page fault path may hold: page fault -> mm->mmap_sem --> nilfs_page_mkwrite() -> nilfs->ns_segctor_sem Even though nilfs_clean_segments() does not perform write access on given user pages, it may cause deadlock because nilfs->ns_segctor_sem is shared per device and mm->mmap_sem can be shared with other tasks. To avoid this problem, this patch moves all calls of copy_from_user() outside the nilfs->ns_segctor_sem lock in the ioctl. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 163 ++++++++++++++++++++++++++++++---------------------- fs/nilfs2/nilfs.h | 3 +- fs/nilfs2/segment.c | 5 +- fs/nilfs2/segment.h | 3 +- 4 files changed, 100 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e3c693d37d6..49489f68eab 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -25,6 +25,7 @@ #include /* lock_kernel(), unlock_kernel() */ #include /* capable() */ #include /* copy_from_user(), copy_to_user() */ +#include #include #include "nilfs.h" #include "segment.h" @@ -297,10 +298,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static ssize_t -nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -361,19 +362,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } -static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_move_blocks); -} - -static ssize_t -nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *cpfile = nilfs->ns_cpfile; struct nilfs_period *periods = buf; int ret, i; @@ -387,36 +379,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) +static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_delete_checkpoints); -} + size_t nmembs = argv->v_nmembs; + int ret; -static ssize_t -nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) -{ - int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_vblocknrs); -} - -static ssize_t -nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *dat = nilfs_dat_inode(nilfs); struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; @@ -455,18 +432,10 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_mark_blocks_dirty); -} - -static ssize_t -nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; @@ -481,31 +450,19 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_segments); -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, - void __user *argp) + struct nilfs_argv *argv, void **kbufs) { - struct nilfs_argv argv[5]; const char *msg; - int dir, ret; - - if (copy_from_user(argv, argp, sizeof(argv))) - return -EFAULT; + int ret; - dir = _IOC_WRITE; - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); + ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); if (ret < 0) { msg = "cannot read source blocks"; goto failed; } - ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); + + ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); if (ret < 0) { /* * can safely abort because checkpoints can be removed @@ -514,7 +471,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete checkpoints"; goto failed; } - ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); + ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]); if (ret < 0) { /* * can safely abort because DAT file is updated atomically @@ -523,7 +480,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete virtual blocks from DAT file"; goto failed; } - ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); + ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]); if (ret < 0) { /* * can safely abort because the operation is nondestructive. @@ -531,7 +488,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); + ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); if (ret < 0) { /* * can safely abort because this operation is atomic. @@ -551,9 +508,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { + struct nilfs_argv argv[5]; + const static size_t argsz[5] = { + sizeof(struct nilfs_vdesc), + sizeof(struct nilfs_period), + sizeof(__u64), + sizeof(struct nilfs_bdesc), + sizeof(__u64), + }; + void __user *base; + void *kbufs[5]; + struct the_nilfs *nilfs; + size_t len, nsegs; + int n, ret; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return nilfs_clean_segments(inode->i_sb, argp); + + if (copy_from_user(argv, argp, sizeof(argv))) + return -EFAULT; + + nsegs = argv[4].v_nmembs; + if (argv[4].v_size != argsz[4]) + return -EINVAL; + /* + * argv[4] points to segment numbers this ioctl cleans. We + * use kmalloc() for its buffer because memory used for the + * segment numbers is enough small. + */ + kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, + nsegs * sizeof(__u64)); + if (IS_ERR(kbufs[4])) + return PTR_ERR(kbufs[4]); + + nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + + for (n = 0; n < 4; n++) { + ret = -EINVAL; + if (argv[n].v_size != argsz[n]) + goto out_free; + + if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) + goto out_free; + + len = argv[n].v_size * argv[n].v_nmembs; + base = (void __user *)(unsigned long)argv[n].v_base; + if (len == 0) { + kbufs[n] = NULL; + continue; + } + + kbufs[n] = vmalloc(len); + if (!kbufs[n]) { + ret = -ENOMEM; + goto out_free; + } + if (copy_from_user(kbufs[n], base, len)) { + ret = -EFAULT; + vfree(kbufs[n]); + goto out_free; + } + } + + ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + + out_free: + while (--n > 0) + vfree(kbufs[n]); + kfree(kbufs[4]); + return ret; } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3d0c18a16db..da6fc0bba2e 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); -int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, + void **); /* inode.c */ extern struct inode *nilfs_new_inode(struct inode *, int); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index fb70ec3be20..22c7f65c240 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) } } -int nilfs_clean_segments(struct super_block *sb, void __user *argp) +int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, + void **kbufs) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp) err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; - err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a98fc1ed0bb..476bdd5df5b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); -extern int nilfs_clean_segments(struct super_block *, void __user *); +extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, + void **); extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); -- cgit v1.2.3 From 83aca8f480fcd2d9748301a5d060cf947dc75b94 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 11 May 2009 23:24:47 +0900 Subject: nilfs2: check size of array structured data exchanged via ioctls Although some ioctls of nilfs2 exchange data in the form of indirectly referenced array, some of them lack size check on the array elements. This inserts the missing checks and rejects requests if data of ioctl does not have a valid format. We usually don't have to check size of structures that we associated with ioctl commands because the size is tested implicitly for identifying ioctl command; the checks this patch adds are for the cases where the implicit check is not applied. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 49489f68eab..50ff3f2cdf2 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -254,6 +254,9 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != sizeof(struct nilfs_bdesc)) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); if (ret < 0) @@ -599,6 +602,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, + size_t membsz, ssize_t (*dofunc)(struct the_nilfs *, __u64 *, int, void *, size_t, size_t)) @@ -611,6 +615,9 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != membsz) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); if (ret < 0) return ret; @@ -632,16 +639,19 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_cpinfo), nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_vinfo), nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); -- cgit v1.2.3 From 2b79bc4f7ebbd5af3c8b867968f9f15602d5f802 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 11 May 2009 14:25:34 -0400 Subject: dup2: Fix return value with oldfd == newfd and invalid fd The return value of dup2 when oldfd == newfd and the fd isn't valid is not getting properly sign extended. We end up with 4294967287 instead of -EBADF. I've reproduced this on SLE11 (2.6.27.21), openSUSE Factory (2.6.29-rc5), and Ubuntu 9.04 (2.6.28). This patch uses a signed int for the error value so it is properly extended. Commit 6c5d0512a091480c9f981162227fdb1c9d70e555 introduced this regression. Reported-by: Jiri Dluhos Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/fcntl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index cc8e4de2fee..1ad703150de 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; + int retval = oldfd; + rcu_read_lock(); if (!fcheck_files(files, oldfd)) - oldfd = -EBADF; + retval = -EBADF; rcu_read_unlock(); - return oldfd; + return retval; } return sys_dup3(oldfd, newfd, 0); } -- cgit v1.2.3 From 8daed1e549b55827758b3af7b8132a73fc51526f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 May 2009 16:10:19 -0400 Subject: nfsd: silence lockdep warning Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5275097a756..b5348405046 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -229,7 +229,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) goto out; status = vfs_readdir(filp, nfsd4_build_namelist, &names); fput(filp); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { entry = list_entry(names.next, struct name_list, list); @@ -264,7 +264,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, rec_dir.dentry, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); -- cgit v1.2.3 From 9c1ee184a30394e54165fa4c15923cabd952c106 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 May 2009 18:36:58 -0400 Subject: ext4: Fix sub-block zeroing for writes into preallocated extents We need to mark the buffer_head mapping preallocated space as new during write_begin. Otherwise we don't zero out the page cache content properly for a partial write. This will cause file corruption with preallocation. Now that we mark the buffer_head new we also need to have a valid buffer_head blocknr so that unmap_underlying_metadata() unmaps the correct block. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ fs/ext4/inode.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e4033215834..172656c2a3b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2875,6 +2875,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (allocated > max_blocks) allocated = max_blocks; set_buffer_unwritten(bh_result); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; goto out2; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e91f978c7f1..d4b634ae06b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2323,6 +2323,13 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); + /* + * With sub-block writes into unwritten extents + * we also need to mark the buffer as new so that + * the unwritten parts of the buffer gets correctly zeroed. + */ + if (buffer_unwritten(bh_result)) + set_buffer_new(bh_result); ret = 0; } -- cgit v1.2.3 From 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 12 May 2009 14:40:37 -0400 Subject: ext4: Use a fake block number for delayed new buffer_head Use a very large unsigned number (~0xffff) as as the fake block number for the delayed new buffer. The VFS should never try to write out this number, but if it does, this will make it obvious. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d4b634ae06b..0ac31a06422 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2297,6 +2297,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret = 0; + sector_t invalid_block = ~((sector_t) 0xffff); + + if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) + invalid_block = ~0; BUG_ON(create == 0); BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); @@ -2318,7 +2322,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* not enough space to reserve */ return ret; - map_bh(bh_result, inode->i_sb, 0); + map_bh(bh_result, inode->i_sb, invalid_block); set_buffer_new(bh_result); set_buffer_delay(bh_result); } else if (ret > 0) { -- cgit v1.2.3 From bfe3891a5f5d3b78146a45f40e435d14f5ae39dd Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 12 May 2009 13:19:44 -0700 Subject: epoll: fix size check in epoll_create() Fix a size check WRT the manual pages. This was inadvertently broken by commit 9fe5ad9c8cef9ad5873d8ee55d1cf00d9b607df0 ("flag parameters add-on: remove epoll_create size param"). Signed-off-by: Davide Libenzi Cc: Cc: rohit verma Cc: Ulrich Drepper Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a89f370fadb..5458e80fc55 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1212,7 +1212,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) SYSCALL_DEFINE1(epoll_create, int, size) { - if (size < 0) + if (size <= 0) return -EINVAL; return sys_epoll_create1(0); -- cgit v1.2.3 From a37b06d589f2c687a38d07569f4ef97c650fde39 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Wed, 13 May 2009 02:56:39 +0100 Subject: Squashfs: fix breakage when page size > metadata block size Squashfs is broken on any system where the page size is larger than the metadata size (8192). This is easily fixed by ensuring cache->pages is always > 0. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Doug Chapman Signed-off-by: Andrew Morton Signed-off-by: Phillip Lougher --- fs/squashfs/cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1c4739e33af..40c98fa6b5d 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -252,6 +252,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, cache->entries = entries; cache->block_size = block_size; cache->pages = block_size >> PAGE_CACHE_SHIFT; + cache->pages = cache->pages ? cache->pages : 1; cache->name = name; cache->num_waiters = 0; spin_lock_init(&cache->lock); -- cgit v1.2.3 From fffb47b80e8bb3f171ef02b90b1ae22c63983979 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 May 2009 02:59:26 +0100 Subject: Squashfs: check page size is not larger than the filesystem block size Normally the block size (by default 128K) will be larger than the page size, unless a non-standard block size has been specified in Mksquashfs, and the page size is larger than 4K. Signed-off-by: Phillip Lougher --- fs/squashfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index ffa6edcd2d0..0adc624c956 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -157,6 +157,16 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE) goto failed_mount; + /* + * Check the system page size is not larger than the filesystem + * block size (by default 128K). This is currently not supported. + */ + if (PAGE_CACHE_SIZE > msblk->block_size) { + ERROR("Page size > filesystem block size (%d). This is " + "currently not supported!\n", msblk->block_size); + goto failed_mount; + } + msblk->block_log = le16_to_cpu(sblk->block_log); if (msblk->block_log > SQUASHFS_FILE_MAX_LOG) goto failed_mount; -- cgit v1.2.3 From e5d287539dba264a1d9d7607a25b8c8c61c9d658 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 May 2009 03:25:20 +0100 Subject: Squashfs: cody tidying, remove commented out line in Makefile Signed-off-by: Phillip Lougher --- fs/squashfs/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 8258cf9a031..70e3244fa30 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,4 +5,3 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o -#squashfs-y += squashfs2_0.o -- cgit v1.2.3 From f2deae9d4e70793568ef9e85d227abb7bef5b622 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 13 May 2009 15:56:10 +0100 Subject: Remove implementation of readpage from the hugetlbfs_aops The core VM assumes the page size used by the address_space in inode->i_mapping is PAGE_SIZE but hugetlbfs breaks this assumption by inserting pages into the page cache at offsets the core VM considers unexpected. This would not be a problem except that hugetlbfs also provide a ->readpage implementation. As it exists, the core VM can assume the base page size is being used, allocate pages on behalf of the filesystem, insert them into the page cache and call ->readpage to populate them. These pages are the wrong size and at the wrong offset for hugetlbfs causing confusion. This patch deletes the ->readpage implementation for hugetlbfs on the grounds the core VM should not be allocating and populating pages on behalf of hugetlbfs. There should be no existing users of the ->readpage implementation so it should not cause a regression. Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 153d9681192..c1462d43e72 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -312,16 +312,6 @@ out: return retval; } -/* - * Read a page. Again trivial. If it didn't already exist - * in the page cache, it is zero-filled. - */ -static int hugetlbfs_readpage(struct file *file, struct page * page) -{ - unlock_page(page); - return -EINVAL; -} - static int hugetlbfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -701,7 +691,6 @@ static void hugetlbfs_destroy_inode(struct inode *inode) } static const struct address_space_operations hugetlbfs_aops = { - .readpage = hugetlbfs_readpage, .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, -- cgit v1.2.3 From d8e2f53ac99f4ce7d63807a84f98d1b80df598cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 14 May 2009 07:46:59 -0400 Subject: cifs: fix error handling in parse_DFS_referrals cifs_strndup_from_ucs returns NULL on error, not an ERR_PTR Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 75e6623a863..5759ba53dc9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3976,9 +3976,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, max_len = data_end - temp; node->path_name = cifs_strndup_from_ucs(temp, max_len, is_unicode, nls_codepage); - if (IS_ERR(node->path_name)) { - rc = PTR_ERR(node->path_name); - node->path_name = NULL; + if (!node->path_name) { + rc = -ENOMEM; goto parse_DFS_referrals_exit; } @@ -3987,11 +3986,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, max_len = data_end - temp; node->node_name = cifs_strndup_from_ucs(temp, max_len, is_unicode, nls_codepage); - if (IS_ERR(node->node_name)) { - rc = PTR_ERR(node->node_name); - node->node_name = NULL; - goto parse_DFS_referrals_exit; - } + if (!node->node_name) + rc = -ENOMEM; } parse_DFS_referrals_exit: -- cgit v1.2.3 From 2757495c906113896b378bf084708846273c87b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:10:02 -0400 Subject: Btrfs: init inode ordered_data_close flag properly This flag is used to decide when we need to send a given file through the ordered code to make sure it is fully written before a transaction commits. It was not being properly set to zero when the inode was being setup. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90c23eb2882..22450bd972d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3122,6 +3122,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->flags = 0; bi->index_cnt = (u64)-1; bi->last_unlink_trans = 0; + bi->ordered_data_close = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); -- cgit v1.2.3 From 76a05b35a320e8c968d0fec8f512a1acae227309 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:24:30 -0400 Subject: Btrfs: Don't loop forever on metadata IO failures When a btrfs metadata read fails, the first thing we try to do is find a good copy on another mirror of the block. If this fails, read_tree_block() ends up returning a buffer that isn't up to date. The btrfs btree reading code was reworked to drop locks and repeat the search when IO was done, but the changes didn't add a check for failed reads. The end result was looping forever on buffers that were never going to become up to date. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a99f1c2a710..fedf8b9f03a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1469,6 +1469,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, u32 blocksize; struct extent_buffer *b = *eb_ret; struct extent_buffer *tmp; + int ret; blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); @@ -1476,6 +1477,10 @@ read_block_for_search(struct btrfs_trans_handle *trans, tmp = btrfs_find_tree_block(root, blocknr, blocksize); if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + /* + * we found an up to date block without sleeping, return + * right away + */ *eb_ret = tmp; return 0; } @@ -1483,7 +1488,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, /* * reduce lock contention at high levels * of the btree by dropping locks before - * we read. + * we read. Don't release the lock on the current + * level because we need to walk this node to figure + * out which blocks to read. */ btrfs_unlock_up_safe(p, level + 1); btrfs_set_path_blocking(p); @@ -1494,10 +1501,21 @@ read_block_for_search(struct btrfs_trans_handle *trans, reada_for_search(root, p, level, slot, key->objectid); btrfs_release_path(NULL, p); + + ret = -EAGAIN; tmp = read_tree_block(root, blocknr, blocksize, gen); - if (tmp) + if (tmp) { + /* + * If the read above didn't mark this buffer up to date, + * it will never end up being up to date. Set ret to EIO now + * and give up so that our caller doesn't loop forever + * on our EAGAINs. + */ + if (!btrfs_buffer_uptodate(tmp, 0)) + ret = -EIO; free_extent_buffer(tmp); - return -EAGAIN; + } + return ret; } /* @@ -1696,6 +1714,9 @@ cow_done: if (ret == -EAGAIN) goto again; + if (ret == -EIO) + goto done; + if (!p->skip_locking) { int lret; @@ -1738,6 +1759,8 @@ done: */ if (!p->leave_spinning) btrfs_set_path_blocking(p); + if (ret < 0) + btrfs_release_path(root, p); return ret; } @@ -4212,6 +4235,11 @@ again: if (ret == -EAGAIN) goto again; + if (ret < 0) { + btrfs_release_path(root, path); + goto done; + } + if (!path->skip_locking) { ret = btrfs_try_spin_lock(next); if (!ret) { @@ -4246,6 +4274,11 @@ again: if (ret == -EAGAIN) goto again; + if (ret < 0) { + btrfs_release_path(root, path); + goto done; + } + if (!path->skip_locking) { btrfs_assert_tree_locked(path->nodes[level]); ret = btrfs_try_spin_lock(next); -- cgit v1.2.3 From cc7b0c9b701a079016183f3546b4d720194b367f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:31:21 -0400 Subject: Btrfs: remove some WARN_ONs in the IO failure path These debugging WARN_ONs make too much console noise during regular IO failures. An IO failure will still generate a number of messages as we verify checksums etc, but these two are not needed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/inode.c | 1 - 2 files changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ff16d3331d..4b0ea0b80c2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -848,8 +848,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (ret == 0) set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); - else - WARN_ON(1); return buf; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22450bd972d..1c8b0190d03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4296,7 +4296,6 @@ out: } if (err) { free_extent_map(em); - WARN_ON(1); return ERR_PTR(err); } return em; -- cgit v1.2.3 From 5d847a8ed970d17e2734ff9e07a74fe36cceb24e Mon Sep 17 00:00:00 2001 From: Li Hong Date: Thu, 14 May 2009 13:52:21 -0400 Subject: Btrfs: remove outdated comment in btrfs_ioctl_resize() In Li Zefan's commit dae7b665cf6d6e6e733f1c9c16cf55547dd37e33, a combination call of kmalloc() and copy_from_user() is replaced by memdup_user(). So btrfs_ioctl_resize() doesn't use GFP_NOFS any more. Signed-off-by: Li Hong Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 48762aa1e94..db84aa48ab7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -437,10 +437,6 @@ out_unlock: return 0; } -/* - * Called inside transaction, so use GFP_NOFS - */ - static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) { u64 new_size; -- cgit v1.2.3 From 6b65c5c61bf86086817a5ed786c8f45755ac83b3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 14 May 2009 13:52:21 -0400 Subject: Btrfs: make show_options result match actual option names The notreelog and flushoncommit mount options were being printed slightly differently. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bf0e84c7560..e99510bfbff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -436,9 +436,9 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (btrfs_test_opt(root, SSD)) seq_puts(seq, ",ssd"); if (btrfs_test_opt(root, NOTREELOG)) - seq_puts(seq, ",no-treelog"); + seq_puts(seq, ",notreelog"); if (btrfs_test_opt(root, FLUSHONCOMMIT)) - seq_puts(seq, ",flush-on-commit"); + seq_puts(seq, ",flushoncommit"); if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) seq_puts(seq, ",noacl"); return 0; -- cgit v1.2.3 From 9f55684c2d9869e8cc53595a3fee679958511cfb Mon Sep 17 00:00:00 2001 From: Sankar P Date: Thu, 14 May 2009 13:52:22 -0400 Subject: Btrfs: Spelling fix in btrfs_lookup_first_block_group comments Signed-off-by: Sankar P Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4966444811..3e2c7c738f2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -312,7 +312,7 @@ btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) } /* - * return the block group that contains teh given bytenr + * return the block group that contains the given bytenr */ struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, -- cgit v1.2.3 From 2a8964d63d50dd2d65d71d342bc7fb6ef4117614 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 14 May 2009 17:05:39 -0400 Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized The BH_Unwritten flag indicates that the buffer is allocated on disk but has not been written; that is, the disk was part of a persistent preallocation area. That flag should only be set when a get_blocks() function is looking up a inode's logical to physical block mapping. When ext4_get_blocks_wrap() is called with create=1, the uninitialized extent is converted into an initialized one, so the BH_Unwritten flag is no longer appropriate. Hence, we need to make sure the BH_Unwritten is not left set, since the combination of BH_Mapped and BH_Unwritten is not allowed; among other things, it will result ext4's get_block() to be called over and over again during the write_begin phase of write(2). Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0ac31a06422..2a9ffd528dd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1149,6 +1149,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, int retval; clear_buffer_mapped(bh); + clear_buffer_unwritten(bh); /* * Try to see if we can get the block without requesting @@ -1178,6 +1179,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, if (retval > 0 && buffer_mapped(bh)) return retval; + /* + * When we call get_blocks without the create flag, the + * BH_Unwritten flag could have gotten set if the blocks + * requested were part of a uninitialized extent. We need to + * clear this flag now that we are committed to convert all or + * part of the uninitialized extent to be an initialized + * extent. This is because we need to avoid the combination + * of BH_Unwritten and BH_Mapped flags being simultaneously + * set on the buffer_head. + */ + clear_buffer_unwritten(bh); + /* * New blocks allocate and/or writing to uninitialized extent * will possibly result in updating i_data, so we take -- cgit v1.2.3 From 2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 May 2009 09:07:28 -0400 Subject: ext4: Fix race in ext4_inode_info.i_cached_extent If two CPU's simultaneously call ext4_ext_get_blocks() at the same time, there is nothing protecting the i_cached_extent structure from being used and updated at the same time. This could potentially cause the wrong location on disk to be read or written to, including potentially causing the corruption of the block group descriptors and/or inode table. This bug has been in the ext4 code since almost the very beginning of ext4's development. Fortunately once the data is stored in the page cache cache, ext4_get_blocks() doesn't need to be called, so trying to replicate this problem to the point where we could identify its root cause was *extremely* difficult. Many thanks to Kevin Shanahan for working over several months to be able to reproduce this easily so we could finally nail down the cause of the corruption. Signed-off-by: "Theodore Ts'o" Reviewed-by: "Aneesh Kumar K.V" --- fs/ext4/extents.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 172656c2a3b..e3a55eb8b26 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1841,11 +1841,13 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, { struct ext4_ext_cache *cex; BUG_ON(len == 0); + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; cex->ec_type = type; cex->ec_block = block; cex->ec_len = len; cex->ec_start = start; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } /* @@ -1902,12 +1904,17 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_extent *ex) { struct ext4_ext_cache *cex; + int ret = EXT4_EXT_CACHE_NO; + /* + * We borrow i_block_reservation_lock to protect i_cached_extent + */ + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; /* has cache valid data? */ if (cex->ec_type == EXT4_EXT_CACHE_NO) - return EXT4_EXT_CACHE_NO; + goto errout; BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); @@ -1918,11 +1925,11 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); - return cex->ec_type; + ret = cex->ec_type; } - - /* not in cache */ - return EXT4_EXT_CACHE_NO; +errout: + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return ret; } /* -- cgit v1.2.3 From 1f71ebedb3f8ce9108978168759c8551d873a912 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 14 May 2009 19:38:24 -0700 Subject: devpts: correctly set default options devpts_get_sb() calls memset(0) to clear mount options and calls parse_mount_options() if user specified any mount options. The memset(0) is bogus since the 'mode' and 'ptmxmode' options are non-zero by default. parse_mount_options() restores options to default anyway and can properly deal with NULL mount options. So in devpts_get_sb() remove memset(0) and call parse_mount_options() even for NULL mount options. Bug reported by Eric Paris: http://lkml.org/lkml/2009/5/7/448. Signed-off-by: Sukadev Bhattiprolu Tested-by: Marc Dionne Reported-by: Eric Paris Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Serge Hallyn Cc: Al Viro Cc: "Rafael J. Wysocki" Reviewed-by: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/devpts/inode.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 21165cf934f..c68edb96944 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -90,6 +90,15 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) #define PARSE_MOUNT 0 #define PARSE_REMOUNT 1 +/* + * parse_mount_options(): + * Set @opts to mount options specified in @data. If an option is not + * specified in @data, set it to its default value. The exception is + * 'newinstance' option which can only be set/cleared on a mount (i.e. + * cannot be changed during remount). + * + * Note: @data may be NULL (in which case all options are set to default). + */ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) { char *p; @@ -355,12 +364,9 @@ static int devpts_get_sb(struct file_system_type *fs_type, struct pts_mount_opts opts; struct super_block *s; - memset(&opts, 0, sizeof(opts)); - if (data) { - error = parse_mount_options(data, PARSE_MOUNT, &opts); - if (error) - return error; - } + error = parse_mount_options(data, PARSE_MOUNT, &opts); + if (error) + return error; if (opts.newinstance) s = sget(fs_type, NULL, set_anon_super, NULL); -- cgit v1.2.3 From 12abb35a03e32c97235fcefdcf2d851be9f82dc2 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:01 -0400 Subject: reiserfs: clean up ifdefs With xattr cleanup even with xattrs disabled, much of the initial setup is still performed. Some #ifdefs are just not needed since the options they protect wouldn't be available anyway. This cleans those up. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2237e10c7c7..cf949646dd5 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -685,20 +685,6 @@ out: return err; } -/* Actual operations that are exported to VFS-land */ -struct xattr_handler *reiserfs_xattr_handlers[] = { - &reiserfs_xattr_user_handler, - &reiserfs_xattr_trusted_handler, -#ifdef CONFIG_REISERFS_FS_SECURITY - &reiserfs_xattr_security_handler, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - &reiserfs_posix_acl_access_handler, - &reiserfs_posix_acl_default_handler, -#endif - NULL -}; - /* * In order to implement different sets of xattr operations for each xattr * prefix with the generic xattr API, a filesystem should create a @@ -922,6 +908,28 @@ static int create_privroot(struct dentry *dentry) return 0; } +#else +int __init reiserfs_xattr_register_handlers(void) { return 0; } +void reiserfs_xattr_unregister_handlers(void) {} +static int create_privroot(struct dentry *dentry) { return 0; } +#endif + +/* Actual operations that are exported to VFS-land */ +struct xattr_handler *reiserfs_xattr_handlers[] = { +#ifdef CONFIG_REISERFS_FS_XATTR + &reiserfs_xattr_user_handler, + &reiserfs_xattr_trusted_handler, +#endif +#ifdef CONFIG_REISERFS_FS_SECURITY + &reiserfs_xattr_security_handler, +#endif +#ifdef CONFIG_REISERFS_FS_POSIX_ACL + &reiserfs_posix_acl_access_handler, + &reiserfs_posix_acl_default_handler, +#endif + NULL +}; + static int xattr_mount_check(struct super_block *s) { /* We need generation numbers to ensure that the oid mapping is correct @@ -941,11 +949,6 @@ static int xattr_mount_check(struct super_block *s) return 0; } -#else -int __init reiserfs_xattr_register_handlers(void) { return 0; } -void reiserfs_xattr_unregister_handlers(void) {} -#endif - /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) @@ -992,7 +995,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) int err = 0; struct dentry *privroot = REISERFS_SB(s)->priv_root; -#ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; @@ -1023,14 +1025,11 @@ error: clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); } -#endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ -#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; else -#endif s->s_flags &= ~MS_POSIXACL; return err; -- cgit v1.2.3 From ceb5edc457f07956c82dccfb54ca8ae7e2a399f0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:02 -0400 Subject: reiserfs: deal with NULL xattr root w/ xattrs disabled This avoids an Oops in open_xa_root that can occur when deleting a file with xattrs disabled. It assumes that the xattr root will be there, and that is not guaranteed. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/super.c | 2 -- fs/reiserfs/xattr.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1215a4f50cd..3567fb9e3fb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -448,13 +448,11 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { -#ifdef CONFIG_REISERFS_FS_XATTR if (REISERFS_SB(s)->xattr_root) { d_invalidate(REISERFS_SB(s)->xattr_root); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; } -#endif if (REISERFS_SB(s)->priv_root) { d_invalidate(REISERFS_SB(s)->priv_root); dput(REISERFS_SB(s)->priv_root); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index cf949646dd5..628075ca82c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -123,7 +123,9 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags) mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); xaroot = dget(REISERFS_SB(sb)->xattr_root); - if (!xaroot->d_inode) { + if (!xaroot) + xaroot = ERR_PTR(-ENODATA); + else if (!xaroot->d_inode) { int err = -ENODATA; if (xattr_may_create(flags)) err = xattr_mkdir(privroot->d_inode, xaroot, 0700); -- cgit v1.2.3 From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:03 -0400 Subject: reiserfs: fixup perms when xattrs are disabled This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission. This is needed to avoid warnings during file deletions and chowns with xattrs disabled. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 628075ca82c..8e7deb0e696 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -871,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return generic_permission(inode, mask, NULL); - else - return generic_permission(inode, mask, reiserfs_check_acl); -} - static int create_privroot(struct dentry *dentry) { int err; @@ -951,6 +934,25 @@ static int xattr_mount_check(struct super_block *s) return 0; } +int reiserfs_permission(struct inode *inode, int mask) +{ + /* + * We don't do permission checks on the internal objects. + * Permissions are determined by the "owning" object. + */ + if (IS_PRIVATE(inode)) + return 0; + +#ifdef CONFIG_REISERFS_FS_XATTR + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) != STAT_DATA_V1) + return generic_permission(inode, mask, reiserfs_check_acl); +#endif + return generic_permission(inode, mask, NULL); +} + /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) -- cgit v1.2.3 From 7ee2cb7f32b299c2b06a31fde155457203e4b7dd Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Mon, 18 May 2009 17:41:40 -0400 Subject: nfs: Fix NFS v4 client handling of MAY_EXEC in nfs_permission. The problem is that permission checking is skipped if atomic open is possible, but when exec opens a file, it just opens it O_READONLY which means EXEC permission will not be checked at that time. This problem is observed by the following sequence (executed as root): mount -t nfs4 server:/ /mnt4 echo "ls" >/mnt4/foo chmod 744 /mnt4/foo su guest -c "mnt4/foo" Signed-off-by: Frank Filz Signed-off-by: Trond Myklebust Cc: stable@kernel.org Tested-by: Eugene Teo Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 370b190a09d..89f98e9a024 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1943,7 +1943,8 @@ int nfs_permission(struct inode *inode, int mask) case S_IFREG: /* NFSv4 has atomic_open... */ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && (mask & MAY_OPEN)) + && (mask & MAY_OPEN) + && !(mask & MAY_EXEC)) goto out; break; case S_IFDIR: -- cgit v1.2.3