From 9fc7c63a1d6e9920038ced782390a54888ed70a6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jbd2: fix the way the b_modified flag is cleared Currently at the start of a journal commit we loop through all of the buffers on the committing transaction and clear the b_modified flag (the flag that is set when a transaction modifies the buffer) under the j_list_lock. The problem is that everywhere else this flag is modified only under the jbd2 lock buffer flag, so it will race with a running transaction who could potentially set it, and have it unset by the committing transaction. This is also a big waste, you can have several thousands of buffers that you are clearing the modified flag on when you may not need to. This patch removes this code and instead clears the b_modified flag upon entering do_get_write_access/journal_get_create_access, so if that transaction does indeed use the buffer then it will be accounted for properly, and if it does not then we know we didn't use it. That will be important for the next patch in this series. Tested thoroughly by myself using postmark/iozone/bonnie++. Cc: Cc: Jan Kara Signed-off-by: Josef Bacik Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 16 ---------------- fs/jbd2/transaction.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index a8173081f83..988fbec1143 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -519,22 +519,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 2\n"); - /* - * First, drop modified flag: all accesses to the buffers - * will be tracked for a new trasaction only -bzzz - */ - spin_lock(&journal->j_list_lock); - if (commit_transaction->t_buffers) { - new_jh = jh = commit_transaction->t_buffers->b_tnext; - do { - J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || - new_jh->b_modified == 0); - new_jh->b_modified = 0; - new_jh = new_jh->b_tnext; - } while (new_jh != jh); - } - spin_unlock(&journal->j_list_lock); - /* * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b9b0b6f899b..9dc71a6b62e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -617,6 +617,12 @@ repeat: jh->b_next_transaction == transaction) goto done; + /* + * this is the first time this transaction is touching this buffer, + * reset the modified flag + */ + jh->b_modified = 0; + /* * If there is already a copy-out version of this buffer, then we don't * need to make another one @@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == NULL) { jh->b_transaction = transaction; + + /* first access by this transaction */ + jh->b_modified = 0; + JBUFFER_TRACE(jh, "file as BJ_Reserved"); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); } else if (jh->b_transaction == journal->j_committing_transaction) { + /* first access by this transaction */ + jh->b_modified = 0; + JBUFFER_TRACE(jh, "set next transaction"); jh->b_next_transaction = transaction; } -- cgit v1.2.3 From 1dfc3220d963385a317264b11154c462a83596ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jbd2: fix possible journal overflow issues There are several cases where the running transaction can get buffers added to its BJ_Metadata list which it never dirtied, which makes its t_nr_buffers counter end up larger than its t_outstanding_credits counter. This will cause issues when starting new transactions as while we are logging buffers we decrement t_outstanding_buffers, so when t_outstanding_buffers goes negative, we will report that we need less space in the journal than we actually need, so transactions will be started even though there may not be enough room for them. In the worst case scenario (which admittedly is almost impossible to reproduce) this will result in the journal running out of space. The fix is to only refile buffers from the committing transaction to the running transactions BJ_Modified list when b_modified is set on that journal, which is the only way to be sure if the running transaction has modified that buffer. This patch also fixes an accounting error in journal_forget, it is possible that we can call journal_forget on a buffer without having modified it, only gotten write access to it, so instead of freeing a credit, we only do so if the buffer was modified. The assert will help catch if this problem occurs. Without these two patches I could hit this assert within minutes of running postmark, with them this issue no longer arises. Cc: Cc: Jan Kara Signed-off-by: Josef Bacik Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 3 +++ fs/jbd2/transaction.c | 21 ++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 988fbec1143..e0139786f71 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -568,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; stats.u.run.rs_blocks_logged = 0; + J_ASSERT(commit_transaction->t_nr_buffers <= + commit_transaction->t_outstanding_credits); + descriptor = NULL; bufs = 0; while (commit_transaction->t_buffers) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 9dc71a6b62e..70245d6638b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1243,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int drop_reserve = 0; int err = 0; + int was_modified = 0; BUFFER_TRACE(bh, "entry"); @@ -1261,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) goto not_jbd; } + /* keep track of wether or not this transaction modified us */ + was_modified = jh->b_modified; + /* * The buffer's going from the transaction, we must drop * all references -bzzz @@ -1278,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - drop_reserve = 1; + /* + * we only want to drop a reference if this transaction + * modified the buffer + */ + if (was_modified) + drop_reserve = 1; /* * We are no longer going to journal this buffer. @@ -1318,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == transaction); jh->b_next_transaction = NULL; - drop_reserve = 1; + + /* + * only drop a reference if this transaction modified + * the buffer + */ + if (was_modified) + drop_reserve = 1; } } @@ -2090,7 +2105,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; __jbd2_journal_file_buffer(jh, jh->b_transaction, - was_dirty ? BJ_Metadata : BJ_Reserved); + jh->b_modified ? BJ_Metadata : BJ_Reserved); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) -- cgit v1.2.3 From 97bd42b9c8be748ad85b362ba3bd401f4d35be80 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Apr 2008 22:04:56 -0400 Subject: ext4: check return of ext4_orphan_get properly This patch fix a panic while running fsfuzzer. We are improperly checking the return of ext4_orphan_get. Signed-off-by: Josef Bacik Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c81a8e759ba..425f42778ef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1594,8 +1594,8 @@ static void ext4_orphan_cleanup (struct super_block * sb, while (es->s_last_orphan) { struct inode *inode; - if (!(inode = - ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { es->s_last_orphan = 0; break; } -- cgit v1.2.3 From 53c550e9750434ddc4275fe0405170e0d1b46731 Mon Sep 17 00:00:00 2001 From: Hisashi Hifumi Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: fdatasync should skip metadata writeout when overwriting Currently fdatasync is identical to fsync in ext3. I think fdatasync should skip journal flush in data=ordered and data=writeback mode when it overwrites to already-instantiated blocks on HDD. When I_DIRTY_DATASYNC flag is not set, fdatasync should skip journal writeout because this indicates only atime or/and mtime updates. Following patch is the same approach of ext2's fsync code(ext2_sync_file). I did a performance test using the sysbench. #sysbench --num-threads=128 --max-requests=50000 --test=fileio --file-total-size=128G --file-test-mode=rndwr --file-fsync-mode=fdatasync run The result on ext3 was: -2.6.24 Operations performed: 0 Read, 50080 Write, 59600 Other = 109680 Total Read 0b Written 782.5Mb Total transferred 782.5Mb (12.116Mb/sec) 775.45 Requests/sec executed Test execution summary: total time: 64.5814s total number of events: 50080 total time taken by event execution: 3713.9836 per-request statistics: min: 0.0000s avg: 0.0742s max: 0.9375s approx. 95 percentile: 0.2901s Threads fairness: events (avg/stddev): 391.2500/23.26 execution time (avg/stddev): 29.0155/1.99 -2.6.24-patched Operations performed: 0 Read, 50009 Write, 61596 Other = 111605 Total Read 0b Written 781.39Mb Total transferred 781.39Mb (16.419Mb/sec) 1050.83 Requests/sec executed Test execution summary: total time: 47.5900s total number of events: 50009 total time taken by event execution: 2934.5768 per-request statistics: min: 0.0000s avg: 0.0587s max: 0.8938s approx. 95 percentile: 0.1993s Threads fairness: events (avg/stddev): 390.6953/22.64 execution time (avg/stddev): 22.9264/1.17 Filesystem I/O throughput was improved. Signed-off-by :Hisashi Hifumi Acked-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 8d50879d1c2..a04a1ac4e0c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) goto out; } + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + /* * The VFS has written the file data. If the inode is unaltered * then we need not start a commit. -- cgit v1.2.3 From f3f12faa7414595f502721c90c34deccc1a03c71 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Apr 2008 22:05:28 -0400 Subject: ext4: fix mount option parsing The "resize" option won't be noticed as it comes after the NULL option, so if you try to mount (or in this case remount) with that option it won't be recognized. Cc: Signed-off-by: Josef Bacik Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 425f42778ef..9d2d9a7fdea 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -945,8 +945,8 @@ static match_table_t tokens = { {Opt_mballoc, "mballoc"}, {Opt_nomballoc, "nomballoc"}, {Opt_stripe, "stripe=%u"}, - {Opt_err, NULL}, {Opt_resize, "resize"}, + {Opt_err, NULL}, }; static ext4_fsblk_t get_sb_block(void **data) -- cgit v1.2.3 From 95c3889cb88ca4833096553c12cde9e7eb792f4c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: Fix fallocate error path Put the old extent details back if we fail to split the uninitialized extent. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9ae6e67090c..a38f3242782 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2154,7 +2154,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_lblk_t iblock, unsigned long max_blocks) { - struct ext4_extent *ex, newex; + struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex1 = NULL; struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; @@ -2173,6 +2173,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); ex2 = ex; + orig_ex.ee_block = ex->ee_block; + orig_ex.ee_len = cpu_to_le16(ee_len); + ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -2201,13 +2204,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3); - if (err) + if (err) { + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); goto out; + } /* * The depth, and hence eh & ex might change * as part of the insert above. */ newdepth = ext_depth(inode); + /* + * update the extent length after successfull insert of the + * split extent + */ + orig_ex.ee_len = cpu_to_le16(ee_len - + ext4_ext_get_actual_len(ex3)); if (newdepth != depth) { depth = newdepth; ext4_ext_drop_refs(path); @@ -2282,6 +2297,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex); + if (err) { + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); + } out: return err ? err : allocated; } -- cgit v1.2.3 From e65187e6d0d541f992e684f88a7e090dcff1aac8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: Enable extent format for symlinks. This patch enables extent-formatted normal symlinks. Using extents format allows a symlink to refer to a block number larger than 2^32 on large filesystems. We still don't enable extent format for fast symlinks, which are contained in the inode itself. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 4 ++-- fs/ext4/namei.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 486e46a3918..cb14646117f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -750,8 +750,8 @@ got: goto fail_free_drop; } if (test_opt(sb, EXTENTS)) { - /* set extent flag only for directory and file */ - if (S_ISDIR(mode) || S_ISREG(mode)) { + /* set extent flag only for diretory, file and normal symlink*/ + if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; ext4_ext_tree_init(handle, inode); err = ext4_update_incompat_feature(handle, sb, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 28aa2ed4297..68611945687 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2217,6 +2217,8 @@ retry: goto out_stop; } } else { + /* clear the extent format for fast symlink */ + EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; -- cgit v1.2.3 From 093a088b76352e0a6fdca84eb78b3aa65fbe6dd1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: ENOSPC error handling for writing to an uninitialized extent This patch handles possible ENOSPC errors when writing to an uninitialized extent in case the filesystem is full. A write to a prealloc area causes the split of an unititalized extent into initialized and uninitialized extents. If we don't have space to add new extent information, instead of returning error, convert the existing uninitialized extent to initialized one. We need to zero out the blocks corresponding to the entire extent to prevent uninitialized data reaching userspace. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a38f3242782..d279ea8c466 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2138,6 +2138,80 @@ void ext4_ext_release(struct super_block *sb) #endif } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +/* FIXME!! we need to try to merge to left or right after zero-out */ +static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +{ + int ret = -EIO; + struct bio *bio; + int blkbits, blocksize; + sector_t ee_pblock; + struct completion event; + unsigned int ee_len, len, done, offset; + + + blkbits = inode->i_blkbits; + blocksize = inode->i_sb->s_blocksize; + ee_len = ext4_ext_get_actual_len(ex); + ee_pblock = ext_pblock(ex); + + /* convert ee_pblock to 512 byte sectors */ + ee_pblock = ee_pblock << (blkbits - 9); + + while (ee_len > 0) { + + if (ee_len > BIO_MAX_PAGES) + len = BIO_MAX_PAGES; + else + len = ee_len; + + bio = bio_alloc(GFP_NOIO, len); + if (!bio) + return -ENOMEM; + bio->bi_sector = ee_pblock; + bio->bi_bdev = inode->i_sb->s_bdev; + + done = 0; + offset = 0; + while (done < len) { + ret = bio_add_page(bio, ZERO_PAGE(0), + blocksize, offset); + if (ret != blocksize) { + /* + * We can't add any more pages because of + * hardware limitations. Start a new bio. + */ + break; + } + done++; + offset += blocksize; + if (offset >= PAGE_CACHE_SIZE) + offset = 0; + } + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(WRITE, bio); + wait_for_completion(&event); + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + ret = 0; + else { + ret = -EIO; + break; + } + bio_put(bio); + ee_len -= done; + ee_pblock += done << (blkbits - 9); + } + return ret; +} + /* * This function is called by ext4_ext_get_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized @@ -2204,14 +2278,19 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3); - if (err) { + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ ex->ee_block = orig_ex.ee_block; ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); ext4_ext_dirty(handle, inode, path + depth); - goto out; - } + return le16_to_cpu(ex->ee_len); + + } else if (err) + goto fix_extent_len; /* * The depth, and hence eh & ex might change * as part of the insert above. @@ -2297,15 +2376,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex); - if (err) { + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ ex->ee_block = orig_ex.ee_block; ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); ext4_ext_dirty(handle, inode, path + depth); - } + return le16_to_cpu(ex->ee_len); + } else if (err) + goto fix_extent_len; out: return err ? err : allocated; + +fix_extent_len: + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); + return err; } /* -- cgit v1.2.3 From 3977c965ec35ce1a7eac988ad313f0fc9aee9660 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: zero out small extents when writing to prealloc area. If the preallocated area is small zero out the full extent instead of splitting them. This should avoid the "write every alternate block" problem that could grow the number of extents dramatically. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d279ea8c466..668d82e494d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2212,6 +2212,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) return ret; } +#define EXT4_EXT_ZERO_LEN 7 + /* * This function is called by ext4_ext_get_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized @@ -2254,6 +2256,18 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; + /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ + if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + return le16_to_cpu(ex->ee_len); + } /* ex1: ee_block to iblock - 1 : uninitialized */ if (iblock > ee_block) { @@ -2272,6 +2286,38 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, /* ex3: to ee_block + ee_len : uninitialised */ if (allocated > max_blocks) { unsigned int newdepth; + /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ + if (allocated <= EXT4_EXT_ZERO_LEN) { + /* Mark first half uninitialized. + * Mark second half initialized and zero out the + * initialized extent + */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = cpu_to_le16(ee_len - allocated); + ext4_ext_mark_uninitialized(ex); + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + + ex3 = &newex; + ex3->ee_block = cpu_to_le32(iblock); + ext4_ext_store_pblock(ex3, newblock); + ex3->ee_len = cpu_to_le16(allocated); + err = ext4_ext_insert_extent(handle, inode, path, ex3); + if (err == -ENOSPC) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + return le16_to_cpu(ex->ee_len); + + } else if (err) + goto fix_extent_len; + + return allocated; + } ex3 = &newex; ex3->ee_block = cpu_to_le32(iblock + max_blocks); ext4_ext_store_pblock(ex3, newblock + max_blocks); @@ -2320,6 +2366,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; } allocated = max_blocks; + + /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying + * to insert a extent in the middle zerout directly + * otherwise give the extent a chance to merge to left + */ + if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && + iblock != ee_block) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_block = orig_ex.ee_block; + ex->ee_len = orig_ex.ee_len; + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); + ext4_ext_dirty(handle, inode, path + depth); + return le16_to_cpu(ex->ee_len); + } } /* * If there was a change of depth as part of the -- cgit v1.2.3 From 267e4db9ac28a09973476e7ec2cb6807e609d35a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: Fix race between migration and mmap write Fail migrate if we allocated new blocks via mmap write. If we write to holes in the file via mmap, we end up allocating new blocks. This block allocation happens without taking inode->i_mutex. Since migrate is protected by i_mutex and migrate expects that no new blocks get allocated during migrate, fail migrate if new blocks get allocated. We can't take inode->i_mutex in the mmap write path because that would result in a locking order violation between i_mutex and mmap_sem. Also adding a separate rw_sempahore for protection is really high overhead for a rare operation such as migrate. Signed-off-by: Aneesh Kumar K.V Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 13 ++++++++++++- fs/ext4/migrate.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8fab233cb05..24a2604dde7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, } else { retval = ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, create, extend_disksize); + + if (retval > 0 && buffer_new(bh)) { + /* + * We allocated new blocks which will result in + * i_data's format changing. Force the migrate + * to fail by clearing migrate flags + */ + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; + } } up_write((&EXT4_I(inode)->i_data_sem)); return retval; @@ -2976,7 +2986,8 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_inode_blocks_set(handle, raw_inode, ei)) goto out_brelse; raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); + /* clear the migrate flag in the raw_inode */ + raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) raw_inode->i_file_acl_high = diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 5c1e27de775..9b4fb07d192 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) } static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode) + struct inode *tmp_inode) { int retval; __le32 i_data[3]; @@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * i_data field of the original inode */ retval = ext4_journal_extend(handle, 1); - if (retval != 0) { + if (retval) { retval = ext4_journal_restart(handle, 1); if (retval) goto err_out; @@ -350,6 +350,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; down_write(&EXT4_I(inode)->i_data_sem); + /* + * if EXT4_EXT_MIGRATE is cleared a block allocation + * happened after we started the migrate. We need to + * fail the migrate + */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { + retval = -EAGAIN; + up_write(&EXT4_I(inode)->i_data_sem); + goto err_out; + } else + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * switch the inode format to prevent read. */ mutex_lock(&(inode->i_mutex)); + /* + * Even though we take i_mutex we can still cause block allocation + * via mmap write to holes. If we have allocated new blocks we fail + * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. + * The flag is updated with i_data_sem held to prevent racing with + * block allocation. + */ + down_read((&EXT4_I(inode)->i_data_sem)); + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; + up_read((&EXT4_I(inode)->i_data_sem)); + handle = ext4_journal_start(inode, 1); ei = EXT4_I(inode); @@ -559,9 +582,15 @@ err_out: * tmp_inode */ free_ext_block(handle, tmp_inode); - else - retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode); + else { + retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); + if (retval) + /* + * if we fail to swap inode data free the extent + * details of the tmp inode + */ + free_ext_block(handle, tmp_inode); + } /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ if (ext4_journal_extend(handle, 1) != 0) -- cgit v1.2.3 From fd28784adc079afa905df56204b1298ddb4d0bfe Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: Fix fallocate to update the file size in each transaction ext4_fallocate needs to update file size in each transaction. Otherwise if we crash the file size won't be seen. We were also not marking the inode dirty after updating file size before. Also when we try to retry allocation due to ENOSPC, make sure we reset the variable ret so that we actually do a retry. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 89 ++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 668d82e494d..c2b004e29ad 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2785,6 +2785,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } +static void ext4_falloc_update_inode(struct inode *inode, + int mode, loff_t new_size, int update_ctime) +{ + struct timespec now; + + if (update_ctime) { + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_ctime, &now)) + inode->i_ctime = now; + } + /* + * Update only when preallocation was requested beyond + * the file size. + */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + new_size > i_size_read(inode)) { + i_size_write(inode, new_size); + EXT4_I(inode)->i_disksize = new_size; + } + +} + /* * preallocate space for a file. This implements ext4's fallocate inode * operation, which gets called from sys_fallocate system call. @@ -2796,8 +2818,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { handle_t *handle; ext4_lblk_t block; + loff_t new_size; unsigned long max_blocks; - ext4_fsblk_t nblocks = 0; int ret = 0; int ret2 = 0; int retries = 0; @@ -2816,9 +2838,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) return -ENODEV; block = offset >> blkbits; + /* + * We can't just convert len to max_blocks because + * If blocksize = 4096 offset = 3072 and len = 2048 + */ max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; - + - block; /* * credits to insert 1 extent into extent tree + buffers to be able to * modify 1 super block, 1 block bitmap and 1 group descriptor. @@ -2834,7 +2859,6 @@ retry: ret = PTR_ERR(handle); break; } - ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); @@ -2850,61 +2874,24 @@ retry: ret2 = ext4_journal_stop(handle); break; } - if (ret > 0) { - /* check wrap through sign-bit/zero here */ - if ((block + ret) < 0 || (block + ret) < block) { - ret = -EIO; - ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - break; - } - if (buffer_new(&map_bh) && ((block + ret) > - (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits) - >> blkbits))) - nblocks = nblocks + ret; - } - - /* Update ctime if new blocks get allocated */ - if (nblocks) { - struct timespec now; - - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_ctime, &now)) - inode->i_ctime = now; - } + if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, + blkbits) >> blkbits)) + new_size = offset + len; + else + new_size = (block + ret) << blkbits; + ext4_falloc_update_inode(inode, mode, new_size, + buffer_new(&map_bh)); ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); if (ret2) break; } - - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + if (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) { + ret = 0; goto retry; - - /* - * Time to update the file size. - * Update only when preallocation was requested beyond the file size. - */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len) > i_size_read(inode)) { - if (ret > 0) { - /* - * if no error, we assume preallocation succeeded - * completely - */ - i_size_write(inode, offset + len); - EXT4_I(inode)->i_disksize = i_size_read(inode); - } else if (ret < 0 && nblocks) { - /* Handle partial allocation scenario */ - loff_t newsize; - - newsize = (nblocks << blkbits) + i_size_read(inode); - i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); - EXT4_I(inode)->i_disksize = i_size_read(inode); - } } - mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } -- cgit v1.2.3 From e067ba0078cd6f00eb6c4052fec630b78ebe59de Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: make ext4_ext_get_blocks always return <= max_blocks ext4_ext_get_blocks() returns number of blocks allocated with buffer heads unmapped for a read from prealloc space. This is needed so that delayed allocation doesn't do block reservation for prealloc space since the blocks are already resevred on disk. Fix ext4_ext_get_blocks to not return greater than max_blocks, since some of the code paths cannot handle such a return value. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c2b004e29ad..f4ef0b745a5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2570,8 +2570,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, } if (create == EXT4_CREATE_UNINITIALIZED_EXT) goto out; - if (!create) + if (!create) { + /* + * We have blocks reserved already. We + * return allocated blocks so that delalloc + * won't do block reservation for us. But + * the buffer head will be unmapped so that + * a read from the block returns 0s. + */ + if (allocated > max_blocks) + allocated = max_blocks; goto out2; + } ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, -- cgit v1.2.3 From 1a89734d4057066344356e9c7e13b6379497aebe Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: Return unwritten buffer head when trying to read from prealloc space. ext4_ext_get_blocks() returns the number of blocks allocated with buffer head unmapped for a read from prealloc space. This is needed so that delayed allocation doesn't do block reservation for prealloc space since the blocks are already reserved on disk. Mark the buffer head unwritten. Some code paths try to read the block if the buffer_head is not new and no uptodate. Marking the buffer head unwritten avoids this reading. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f4ef0b745a5..7733e294336 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2580,6 +2580,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ if (allocated > max_blocks) allocated = max_blocks; + /* mark the buffer unwritten */ + __set_bit(BH_Unwritten, &bh_result->b_state); goto out2; } -- cgit v1.2.3 From 161e7b7c1d24112d188df9a7b30d468a8d135b96 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 22:03:59 -0400 Subject: ext4: Cache the correct extent length for uninit extents When we convert an uninitialized extent to an initialized extent we need to make sure we return the number of blocks in the extent from the file system block corresponding to logical file block. Otherwise we cache wrong extent details and this results in file system corruption. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7733e294336..c7c42c9c7bf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2266,7 +2266,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - return le16_to_cpu(ex->ee_len); + /* zeroed the full extent */ + return allocated; } /* ex1: ee_block to iblock - 1 : uninitialized */ @@ -2311,11 +2312,45 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - return le16_to_cpu(ex->ee_len); + /* zeroed the full extent */ + return allocated; } else if (err) goto fix_extent_len; + /* + * We need to zero out the second half because + * an fallocate request can update file size and + * converting the second half to initialized extent + * implies that we can leak some junk data to user + * space. + */ + err = ext4_ext_zeroout(inode, ex3); + if (err) { + /* + * We should actually mark the + * second half as uninit and return error + * Insert would have changed the extent + */ + depth = ext_depth(inode); + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, + iblock, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + return err; + } + ex = path[depth].p_ext; + err = ext4_ext_get_access(handle, inode, + path + depth); + if (err) + return err; + ext4_ext_mark_uninitialized(ex); + ext4_ext_dirty(handle, inode, path + depth); + return err; + } + + /* zeroed the second half */ return allocated; } ex3 = &newex; @@ -2333,7 +2368,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - return le16_to_cpu(ex->ee_len); + /* zeroed the full extent */ + return allocated; } else if (err) goto fix_extent_len; @@ -2381,7 +2417,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - return le16_to_cpu(ex->ee_len); + /* zero out the first half */ + return allocated; } } /* @@ -2448,7 +2485,8 @@ insert: ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - return le16_to_cpu(ex->ee_len); + /* zero out the first half */ + return allocated; } else if (err) goto fix_extent_len; out: -- cgit v1.2.3 From 5cdd7b2d7716a7ed7d6dc7588e2d015f04d46640 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 29 Apr 2008 22:03:54 -0400 Subject: Convert ext4 to use unlocked_ioctl I checked ext4_ioctl and it looked largely safe to not be used without BKL. So convert it over to unlocked_ioctl. Signed-off-by: Andi Kleen Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 2 +- fs/ext4/file.c | 2 +- fs/ext4/ioctl.c | 12 +++--------- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2c23bade9aa..88c97f7312b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext4_readdir, /* we take BKL. needed?*/ - .ioctl = ext4_ioctl, /* BKL held */ + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac35ec58db5..20507a24506 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext4_file_write, - .ioctl = ext4_ioctl, + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 25b13ede808..ce937fe432a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -18,9 +18,9 @@ #include #include -int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -277,9 +277,6 @@ setversion_out: #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETFLAGS: @@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif -- cgit v1.2.3 From 4ddfef7b41aebbbede73f361cb938800ba3072dc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: reduce mballoc stack usage with noinline_for_stack mballoc.c is a whole lot of static functions, which gcc seems to really like to inline. With the changes below, on x86, I can at least get from: 432 ext4_mb_new_blocks 240 ext4_mb_free_blocks 208 ext4_mb_discard_group_preallocations 188 ext4_mb_seq_groups_show 164 ext4_mb_init_cache 152 ext4_mb_release_inode_pa 136 ext4_mb_seq_history_show ... to 220 ext4_mb_free_blocks 188 ext4_mb_seq_groups_show 176 ext4_mb_regular_allocator 164 ext4_mb_init_cache 156 ext4_mb_new_blocks 152 ext4_mb_release_inode_pa 136 ext4_mb_seq_history_show 124 ext4_mb_release_group_pa ... which still has some big functions in there, but not 432 bytes! Signed-off-by: Eric Sandeen Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9d57695de74..66e1451f64e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1168,8 +1168,9 @@ out: return err; } -static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, - struct ext4_buddy *e4b) +static noinline_for_stack int +ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; @@ -1965,7 +1966,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, return 0; } -static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { ext4_group_t group; ext4_group_t i; @@ -2465,7 +2467,8 @@ static void ext4_mb_history_init(struct super_block *sb) /* if we can't allocate history, then we simple won't use it */ } -static void ext4_mb_store_history(struct ext4_allocation_context *ac) +static noinline_for_stack void +ext4_mb_store_history(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_mb_history h; @@ -2801,7 +2804,8 @@ int ext4_mb_release(struct super_block *sb) return 0; } -static void ext4_mb_free_committed_blocks(struct super_block *sb) +static noinline_for_stack void +ext4_mb_free_committed_blocks(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); int err; @@ -3021,7 +3025,8 @@ void exit_ext4_mballoc(void) * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps * Returns 0 if success or error code */ -static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, +static noinline_for_stack int +ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle) { struct buffer_head *bitmap_bh = NULL; @@ -3138,7 +3143,8 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) * Normalization means making request better in terms of * size and alignment */ -static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, +static noinline_for_stack void +ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { int bsbits, max; @@ -3404,7 +3410,8 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, /* * search goal blocks in preallocated space */ -static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_use_preallocated(struct ext4_allocation_context *ac) { struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; @@ -3571,7 +3578,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, /* * creates new preallocated space for given inode */ -static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_prealloc_space *pa; @@ -3658,7 +3666,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) /* * creates new preallocated space for locality group inodes belongs to */ -static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) +static noinline_for_stack int +ext4_mb_new_group_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_locality_group *lg; @@ -3731,8 +3740,8 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, - struct buffer_head *bitmap_bh, +static noinline_for_stack int +ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { struct ext4_allocation_context *ac; @@ -3803,7 +3812,8 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, return err; } -static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, +static noinline_for_stack int +ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { struct ext4_allocation_context *ac; @@ -3845,7 +3855,8 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, * - how many do we discard * 1) how many requested */ -static int ext4_mb_discard_group_preallocations(struct super_block *sb, +static noinline_for_stack int +ext4_mb_discard_group_preallocations(struct super_block *sb, ext4_group_t group, int needed) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -4167,7 +4178,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) mutex_lock(&ac->ac_lg->lg_mutex); } -static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, +static noinline_for_stack int +ext4_mb_initialize_context(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { struct super_block *sb = ar->inode->i_sb; @@ -4398,7 +4410,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb, ext4_mb_free_committed_blocks(sb); } -static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, +static noinline_for_stack int +ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_t group, ext4_grpblk_t block, int count) { struct ext4_group_info *db = e4b->bd_info; -- cgit v1.2.3 From 9a0762c5af40e4aa64fef999967459c98e6ae4c9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: Convert list_for_each_rcu() to list_for_each_entry_rcu() The list_for_each_entry_rcu() primitive should be used instead of list_for_each_rcu(), as the former is easier to use and provides better type safety. http://groups.google.com/group/linux.kernel/browse_thread/thread/45749c83451cebeb/0633a65759ce7713?lnk=raot Signed-off-by: Aneesh Kumar K.V Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 66e1451f64e..aaaccf5986f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3149,10 +3149,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, { int bsbits, max; ext4_lblk_t end; - struct list_head *cur; loff_t size, orig_size, start_off; ext4_lblk_t start, orig_start; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); + struct ext4_prealloc_space *pa; /* do normalize only data requests, metadata requests do not need preallocation */ @@ -3238,12 +3238,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* check we don't cross already preallocated blocks */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { unsigned long pa_end; - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - if (pa->pa_deleted) continue; spin_lock(&pa->pa_lock); @@ -3285,10 +3282,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, /* XXX: extra loop to check we really don't overlap preallocations */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { unsigned long pa_end; - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0) { pa_end = pa->pa_lstart + pa->pa_len; @@ -3416,7 +3411,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; struct ext4_prealloc_space *pa; - struct list_head *cur; /* only data can be preallocated */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) @@ -3424,8 +3418,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* first, try per-file preallocation */ rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); + list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { /* all fields in this condition don't change, * so we can skip locking for them */ @@ -3457,8 +3450,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) return 0; rcu_read_lock(); - list_for_each_rcu(cur, &lg->lg_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { atomic_inc(&pa->pa_count); -- cgit v1.2.3 From e8546d0615542684ca02ba03edebec1a503beb6b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: le*_add_cpu conversion replace all: little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) + expression_in_cpu_byteorder); with: leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Signed-off-by: "Theodore Ts'o" Cc: linux-ext4@vger.kernel.org Cc: sct@redhat.com Cc: Andrew Morton Cc: adilger@clusterfs.com Cc: Mingming Cao --- fs/ext4/balloc.c | 7 ++----- fs/ext4/extents.c | 20 +++++++++----------- fs/ext4/ialloc.c | 12 ++++-------- fs/ext4/mballoc.c | 7 ++----- fs/ext4/resize.c | 6 ++---- fs/ext4/super.c | 2 +- fs/ext4/xattr.c | 6 ++---- 7 files changed, 22 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0737e05ba3d..5d348998ff3 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -752,9 +752,7 @@ do_more: jbd_unlock_bh_state(bitmap_bh); spin_lock(sb_bgl_lock(sbi, block_group)); - desc->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + - group_freed); + le16_add_cpu(&desc->bg_free_blocks_count, group_freed); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); @@ -1823,8 +1821,7 @@ allocated: spin_lock(sb_bgl_lock(sbi, group_no)); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + le16_add_cpu(&gdp->bg_free_blocks_count, -num); gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c7c42c9c7bf..3d5a35f373d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ix->ei_block = cpu_to_le32(logical); ext4_idx_store_pblock(ix, ptr); - curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); + le16_add_cpu(&curp->p_hdr->eh_entries, 1); BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) > le16_to_cpu(curp->p_hdr->eh_max)); @@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } if (m) { memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); - neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); + le16_add_cpu(&neh->eh_entries, m); } set_buffer_uptodate(bh); @@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto cleanup; - path[depth].p_hdr->eh_entries = - cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); + le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto cleanup; @@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, if (m) { memmove(++fidx, path[i].p_idx - m, sizeof(struct ext4_extent_idx) * m); - neh->eh_entries = - cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); + le16_add_cpu(&neh->eh_entries, m); } set_buffer_uptodate(bh); unlock_buffer(bh); @@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + i); if (err) goto cleanup; - path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); + le16_add_cpu(&path[i].p_hdr->eh_entries, -m); err = ext4_ext_dirty(handle, inode, path + i); if (err) goto cleanup; @@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode, * sizeof(struct ext4_extent); memmove(ex + 1, ex + 2, len); } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); + le16_add_cpu(&eh->eh_entries, -1); merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) @@ -1560,7 +1558,7 @@ has_space: path[depth].p_ext = nearex; } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); + le16_add_cpu(&eh->eh_entries, 1); nearex = path[depth].p_ext; nearex->ee_block = newext->ee_block; ext4_ext_store_pblock(nearex, ext_pblock(newext)); @@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path); if (err) return err; - path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); + le16_add_cpu(&path->p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path); if (err) return err; @@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (num == 0) { /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + le16_add_cpu(&eh->eh_entries, -1); } ex->ee_block = cpu_to_le32(block); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index cb14646117f..e8d24f24f28 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -223,11 +223,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) if (gdp) { spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_inodes_count = cpu_to_le16( - le16_to_cpu(gdp->bg_free_inodes_count) + 1); + le16_add_cpu(&gdp->bg_free_inodes_count, 1); if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); + le16_add_cpu(&gdp->bg_used_dirs_count, -1); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); @@ -664,11 +662,9 @@ got: cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); } - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + le16_add_cpu(&gdp->bg_free_inodes_count, -1); if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + le16_add_cpu(&gdp->bg_used_dirs_count, 1); } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index aaaccf5986f..4865d318d7e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3099,9 +3099,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ac->ac_b_ex.fe_group, gdp)); } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); + le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); @@ -4593,8 +4591,7 @@ do_more: } spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); + le16_add_cpu(&gdp->bg_free_blocks_count, count); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e29efa0f9d6..728e3efa84b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -502,8 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT4_SB(sb)->s_gdb_count++; kfree(o_group_desc); - es->s_reserved_gdt_blocks = - cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); + le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); return 0; @@ -877,8 +876,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ ext4_blocks_count_set(es, ext4_blocks_count(es) + input->blocks_count); - es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + - EXT4_INODES_PER_GROUP(sb)); + le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); /* * We need to protect s_groups_count against other CPUs seeing diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9d2d9a7fdea..9c0a3448ec6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1392,7 +1392,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, #endif if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); - es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); + le16_add_cpu(&es->s_mnt_count, 1); es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e9054c1c7d9..726716b618d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -484,8 +484,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, get_bh(bh); ext4_forget(handle, 1, inode, bh, bh->b_blocknr); } else { - BHDR(bh)->h_refcount = cpu_to_le32( - le32_to_cpu(BHDR(bh)->h_refcount) - 1); + le32_add_cpu(&BHDR(bh)->h_refcount, -1); error = ext4_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; @@ -789,8 +788,7 @@ inserted: if (error) goto cleanup_dquot; lock_buffer(new_bh); - BHDR(new_bh)->h_refcount = cpu_to_le32(1 + - le32_to_cpu(BHDR(new_bh)->h_refcount)); + le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); -- cgit v1.2.3 From 216c34b2b8a3687afed4d269acec140c8baf23fe Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: convert byte order of constant instead of variable Convert byte order of constant instead of variable which can be done at compile time (vs run time). Signed-off-by: Marcin Slusarz Cc: Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9c0a3448ec6..01d163964e6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1388,7 +1388,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, * a plain journaled filesystem we can keep it set as * valid forever! :) */ - es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); + es->s_state &= cpu_to_le16(~EXT4_VALID_FS); #endif if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); -- cgit v1.2.3 From d00a6d7b40b44ee6b03f492a6c58f5bc4649c784 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: use ext4_group_first_block_no() Use ext4_group_first_block_no() and assign the return values to ext4_fsblk_t variables. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Stephen Tweedie Cc: adilger@clusterfs.com Cc: Andrew Morton Cc: Mingming Cao --- fs/ext4/balloc.c | 6 +++--- fs/ext4/xattr.c | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5d348998ff3..a080d7f5fac 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) { - unsigned long start; int bit, bit_max; unsigned free_blocks, group_blocks; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { + ext4_fsblk_t start; + for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); - start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + - le32_to_cpu(sbi->s_es->s_first_data_block); + start = ext4_group_first_block_no(sb, block_group); /* Set bits for block and inode bitmaps, and inode table */ ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 726716b618d..bc1d7daac8e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -806,10 +806,8 @@ inserted: get_bh(new_bh); } else { /* We need to allocate a new block */ - ext4_fsblk_t goal = le32_to_cpu( - EXT4_SB(sb)->s_es->s_first_data_block) + - (ext4_fsblk_t)EXT4_I(inode)->i_block_group * - EXT4_BLOCKS_PER_GROUP(sb); + ext4_fsblk_t goal = ext4_group_first_block_no(sb, + EXT4_I(inode)->i_block_group); ext4_fsblk_t block = ext4_new_block(handle, inode, goal, &error); if (error) -- cgit v1.2.3 From c0a4ef38ac90d9053fcf3e22f81520a507c1a7bd Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: use ext4_get_group_desc() Use ext4_get_group_desc() in ext4_get_inode_block() instead of open coding the functionality. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Stephen Tweedie Cc: adilger@clusterfs.com Cc: Andrew Morton Cc: Mingming Cao --- fs/ext4/inode.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 24a2604dde7..aa0fb985b15 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2511,12 +2511,10 @@ out_stop: static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, unsigned long ino, struct ext4_iloc *iloc) { - unsigned long desc, group_desc; ext4_group_t block_group; unsigned long offset; ext4_fsblk_t block; - struct buffer_head *bh; - struct ext4_group_desc * gdp; + struct ext4_group_desc *gdp; if (!ext4_valid_inum(sb, ino)) { /* @@ -2528,22 +2526,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - if (block_group >= EXT4_SB(sb)->s_groups_count) { - ext4_error(sb,"ext4_get_inode_block","group >= groups count"); + gdp = ext4_get_group_desc(sb, block_group, NULL); + if (!gdp) return 0; - } - smp_rmb(); - group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); - desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - bh = EXT4_SB(sb)->s_group_desc[group_desc]; - if (!bh) { - ext4_error (sb, "ext4_get_inode_block", - "Descriptor not loaded"); - return 0; - } - gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data + - desc * EXT4_DESC_SIZE(sb)); /* * Figure out the offset within the block group inode table */ -- cgit v1.2.3 From a871611b474bfcdee422c0cf5d16f509dce096f5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: check ext4_journal_get_write_access() errors Check ext4_journal_get_write_access() errors. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Stephen Tweedie Cc: adilger@clusterfs.com Cc: Andrew Morton Cc: Mingming Cao --- fs/ext4/namei.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 68611945687..63c33e05347 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle, *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - if ((bh = ext4_bread(handle, inode, *block, 1, err))) { + bh = ext4_bread(handle, inode, *block, 1, err); + if (bh) { inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; - ext4_journal_get_write_access(handle,bh); + *err = ext4_journal_get_write_access(handle, bh); + if (*err) { + brelse(bh); + bh = NULL; + } } return bh; } -- cgit v1.2.3 From 14499f3592f3f52ceb7a639466de9ca21e2c1914 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: remove extra define of ext4_new_blocks_old from mballoc.c The function prototype of ext4_new_blocks_old() is defined in ext4_fs.h, so we don't need the extra function prototype in mballoc.c Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4865d318d7e..d9136b539c1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -576,8 +576,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); static struct proc_dir_entry *proc_root_ext4; struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); -- cgit v1.2.3 From d3a95d477d4fcb2c276b8357087a6c862c9e1949 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: make ext4_xattr_list() static This patch makes the needlessly global ext4_xattr_list() static. Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/xattr.c | 4 +++- fs/ext4/xattr.h | 7 ------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index bc1d7daac8e..739930427b8 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *, struct mb_cache_entry **); static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); +static int ext4_xattr_list(struct inode *inode, char *buffer, + size_t buffer_size); static struct mb_cache *ext4_xattr_cache; @@ -420,7 +422,7 @@ cleanup: * Returns a negative error number on failure, or the number of bytes * used / required on success. */ -int +static int ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) { int i_error, b_error; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index d7f5d6a1265..5992fe979bb 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler; extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ext4_xattr_list(struct inode *, char *, size_t); extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); @@ -98,12 +97,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, return -EOPNOTSUPP; } -static inline int -ext4_xattr_list(struct inode *inode, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - static inline int ext4_xattr_set(struct inode *inode, int name_index, const char *name, const void *value, size_t size, int flags) -- cgit v1.2.3 From 9fa27c85de57d38ca698f4e34fdd1ab06b6c8e49 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 28 Apr 2008 09:40:00 -0400 Subject: jbd2: tidy up revoke cache initialisation and destruction Make revocation cache destruction safe to call if initialisation fails partially or entirely. This allows it to be used to cleanup in the case of initialisation failure, simplifying the code slightly. Signed-off-by: Duane Griffin Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" Signed-off-by: Aneesh Kumar K.V --- fs/jbd2/revoke.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 2e1453a5e99..72b260896bb 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -167,33 +167,41 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } +void jbd2_journal_destroy_revoke_caches(void) +{ + if (jbd2_revoke_record_cache) { + kmem_cache_destroy(jbd2_revoke_record_cache); + jbd2_revoke_record_cache = NULL; + } + if (jbd2_revoke_table_cache) { + kmem_cache_destroy(jbd2_revoke_table_cache); + jbd2_revoke_table_cache = NULL; + } +} + int __init jbd2_journal_init_revoke_caches(void) { + J_ASSERT(!jbd2_revoke_record_cache); + J_ASSERT(!jbd2_revoke_table_cache); + jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", sizeof(struct jbd2_revoke_record_s), 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); if (!jbd2_revoke_record_cache) - return -ENOMEM; + goto record_cache_failure; jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", sizeof(struct jbd2_revoke_table_s), 0, SLAB_TEMPORARY, NULL); - if (!jbd2_revoke_table_cache) { - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - return -ENOMEM; - } + if (!jbd2_revoke_table_cache) + goto table_cache_failure; return 0; -} - -void jbd2_journal_destroy_revoke_caches(void) -{ - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - kmem_cache_destroy(jbd2_revoke_table_cache); - jbd2_revoke_table_cache = NULL; +table_cache_failure: + jbd2_journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; } /* Initialise the revoke table for a given journal to a given size. */ -- cgit v1.2.3 From 83c49523c91fff10493f5b3c102063b02ab76907 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jbd2: eliminate duplicated code in revocation table init/destroy functions The revocation table initialisation/destruction code is repeated for each of the two revocation tables stored in the journal. Refactoring the duplicated code into functions is tidier, simplifies the logic in initialisation in particular, and slightly reduces the code size. There should not be any functional change. Signed-off-by: Duane Griffin Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/jbd2/revoke.c | 127 ++++++++++++++++++++++--------------------------------- 1 file changed, 51 insertions(+), 76 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 72b260896bb..144a2065f03 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -204,109 +204,84 @@ record_cache_failure: return -ENOMEM; } -/* Initialise the revoke table for a given journal to a given size. */ - -int jbd2_journal_init_revoke(journal_t *journal, int hash_size) +static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { - int shift, tmp; + int shift = 0; + int tmp = hash_size; + struct jbd2_revoke_table_s *table; - J_ASSERT (journal->j_revoke_table[0] == NULL); + table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - shift = 0; - tmp = hash_size; while((tmp >>= 1UL) != 0UL) shift++; - journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = shift; + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(jbd2_revoke_table_cache, table); + table = NULL; + goto out; } for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + INIT_LIST_HEAD(&table->hash_table[tmp]); - journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; +out: + return table; +} + +static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; + + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); } - journal->j_revoke = journal->j_revoke_table[1]; + kfree(table->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, table); +} - /* Check that the hash_size is a power of two */ +/* Initialise the revoke table for a given journal to a given size. */ +int jbd2_journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_size = hash_size; + journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; - journal->j_revoke->hash_shift = shift; + journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } - - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void jbd2_journal_destroy_revoke(journal_t *journal) { - struct jbd2_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; ihash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; ihash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); } -- cgit v1.2.3 From 8a9362eb405e380432e6883cb83830df3b6cdf78 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jbd2: replace potentially false assertion with if block If an error occurs during jbd2 cache initialisation it is possible for the journal_head_cache to be NULL when jbd2_journal_destroy_journal_head_cache is called. Replace the J_ASSERT with an if block to handle the situation correctly. Note that even with this fix things will break badly if jbd2 is statically compiled in and cache initialisation fails. Signed-off-by: Duane Griffin Cc: Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb7eb6c27bc..2dbf23e82c6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1976,9 +1976,10 @@ static int journal_init_jbd2_journal_head_cache(void) static void jbd2_journal_destroy_jbd2_journal_head_cache(void) { - J_ASSERT(jbd2_journal_head_cache != NULL); - kmem_cache_destroy(jbd2_journal_head_cache); - jbd2_journal_head_cache = NULL; + if (jbd2_journal_head_cache) { + kmem_cache_destroy(jbd2_journal_head_cache); + jbd2_journal_head_cache = NULL; + } } /* -- cgit v1.2.3 From 5648ba5b2dc0d07a8108fabc7b9100962e9e1d88 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jbd2: fix kernel-doc notation Fix kernel-doc notation in jbd2. Signed-off-by: Randy Dunlap Signed-off-by: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 5 +++-- fs/jbd2/transaction.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 2dbf23e82c6..d707a219e21 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -997,13 +997,14 @@ fail: */ /** - * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure + * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure * @bdev: Block device on which to create the journal * @fs_dev: Device which hold journalled filesystem for this journal. * @start: Block nr Start of journal. * @len: Length of the journal in blocks. * @blocksize: blocksize of journalling device - * @returns: a newly created journal_t * + * + * Returns: a newly created journal_t * * * jbd2_journal_init_dev creates a journal which maps a fixed contiguous * range of blocks on an arbitrary block device. diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 70245d6638b..401bf9d2365 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1462,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle) return err; } -/**int jbd2_journal_force_commit() - force any uncommitted transactions +/** + * int jbd2_journal_force_commit() - force any uncommitted transactions * @journal: journal to force * * For synchronous operations: force any uncommitted transactions -- cgit v1.2.3 From 620de4e19890c623eb4ba293ec19b42e2e391b89 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 29 Apr 2008 22:02:47 -0400 Subject: jbd2: only create debugfs and stats entries if init is successful jbd2 debugfs and stats entries should only be created if cache initialisation is successful. At the moment they are being created unconditionally which will leave them dangling if cache (and hence module) initialisation fails. Signed-off-by: Duane Griffin Cc: Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d707a219e21..64356e85a10 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2307,10 +2307,12 @@ static int __init journal_init(void) BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); ret = journal_init_caches(); - if (ret != 0) + if (ret == 0) { + jbd2_create_debugfs_entry(); + jbd2_create_jbd_stats_proc_entry(); + } else { jbd2_journal_destroy_caches(); - jbd2_create_debugfs_entry(); - jbd2_create_jbd_stats_proc_entry(); + } return ret; } -- cgit v1.2.3 From 46e665e9d297525d286989640cf4247cbe941df6 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: ext4: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Cc: Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 14 +++++------ fs/ext4/ext4_jbd2.c | 12 ++++----- fs/ext4/extents.c | 2 +- fs/ext4/ialloc.c | 10 ++++---- fs/ext4/inode.c | 8 +++--- fs/ext4/mballoc.c | 20 +++++++-------- fs/ext4/namei.c | 26 ++++++++++---------- fs/ext4/resize.c | 70 ++++++++++++++++++++++++++--------------------------- fs/ext4/super.c | 16 ++++++------ fs/ext4/xattr.c | 16 ++++++------ 10 files changed, 97 insertions(+), 97 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a080d7f5fac..af5032b23c2 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -58,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Checksum bad for group %lu\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb, return 1; err_out: - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Invalid block bitmap - " "block_group = %d, block = %llu", block_group, bitmap_blk); @@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %llu", (int)block_group, (unsigned long long)bitmap_blk); @@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) } if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %llu", (int)block_group, (unsigned long long)bitmap_blk); @@ -360,7 +360,7 @@ restart: BUG(); } #define rsv_window_dump(root, verbose) \ - __rsv_window_dump((root), (verbose), __FUNCTION__) + __rsv_window_dump((root), (verbose), __func__) #else #define rsv_window_dump(root, verbose) do {} while (0) #endif @@ -740,7 +740,7 @@ do_more: if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, bitmap_bh->b_data)) { jbd_unlock_bh_state(bitmap_bh); - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "bit already cleared for block %llu", (ext4_fsblk_t)(block + i)); jbd_lock_bh_state(bitmap_bh); @@ -1796,7 +1796,7 @@ allocated: if (ext4_test_bit(grp_alloc_blk+i, bh2jh(bitmap_bh)->b_committed_data)) { printk("%s: block was unexpectedly set in " - "b_committed_data\n", __FUNCTION__); + "b_committed_data\n", __func__); } } } diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d6afe4e2734..2e6007d418d 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -9,7 +9,7 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, { int err = jbd2_journal_get_undo_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, { int err = jbd2_journal_get_write_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, { int err = jbd2_journal_forget(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, { int err = jbd2_journal_revoke(handle, blocknr, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where, { int err = jbd2_journal_get_create_access(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } @@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where, { int err = jbd2_journal_dirty_metadata(handle, bh); if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + ext4_journal_abort_handle(where, __func__, bh, handle, err); return err; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3d5a35f373d..1c8aab4dad2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -308,7 +308,7 @@ corrupted: } #define ext4_ext_check_header(inode, eh, depth) \ - __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) + __ext4_ext_check_header(__func__, inode, eh, depth) #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index e8d24f24f28..a86377401ff 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", + ext4_error(sb, __func__, "Checksum bad for group %lu\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -586,7 +586,7 @@ got: ino++; if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "reserved inode or inode > inodes count - " "block_group = %lu, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); @@ -792,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "bad orphan ino %lu! e2fsck was run?", ino); goto error; } @@ -801,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "inode bitmap error for orphan %lu", ino); goto error; } @@ -826,7 +826,7 @@ iget_failed: err = PTR_ERR(inode); inode = NULL; bad_orphan: - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index aa0fb985b15..bd1a391725c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, BUFFER_TRACE(bh, "call ext4_journal_revoke"); err = ext4_journal_revoke(handle, blocknr, bh); if (err) - ext4_abort(inode->i_sb, __FUNCTION__, + ext4_abort(inode->i_sb, __func__, "error %d when attempting revoke", err); BUFFER_TRACE(bh, "exit"); return err; @@ -1240,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) { int err = jbd2_journal_dirty_data(handle, bh); if (err) - ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, + ext4_journal_abort_handle(__func__, __func__, bh, handle, err); return err; } @@ -3371,7 +3371,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, __FUNCTION__, + ext4_warning(inode->i_sb, __func__, "Unable to expand inode %lu. Delete" " some EAs or run e2fsck.", inode->i_ino); @@ -3412,7 +3412,7 @@ void ext4_dirty_inode(struct inode *inode) current_handle->h_transaction != handle->h_transaction) { /* This task has a transaction open against a different fs */ printk(KERN_EMERG "%s: transactions do not match!\n", - __FUNCTION__); + __func__); } else { jbd_debug(5, "marking dirty. outer handle=%p\n", current_handle); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d9136b539c1..0b46fc0ca19 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -734,7 +734,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr += le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - ext4_error(sb, __FUNCTION__, "double-free of inode" + ext4_error(sb, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %lu)\n", inode ? inode->i_ino : 0, blocknr, first + i, e4b->bd_group); @@ -906,7 +906,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ - __FILE__, __FUNCTION__, __LINE__) + __FILE__, __func__, __LINE__) #else #define mb_check_buddy(e4b) #endif @@ -980,7 +980,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, grp->bb_fragments = fragments; if (free != grp->bb_free) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", group, free, grp->bb_free); /* @@ -1366,7 +1366,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, blocknr += le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - ext4_error(sb, __FUNCTION__, "double-free of inode" + ext4_error(sb, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %lu)\n", inode ? inode->i_ino : 0, blocknr, block, e4b->bd_group); @@ -1847,7 +1847,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * we have free blocks */ - ext4_error(sb, __FUNCTION__, "%d free blocks as per " + ext4_error(sb, __func__, "%d free blocks as per " "group info. But bitmap says 0\n", free); break; @@ -1856,7 +1856,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); if (free < ex.fe_len) { - ext4_error(sb, __FUNCTION__, "%d free blocks as per " + ext4_error(sb, __func__, "%d free blocks as per " "group info. But got %d blocks\n", free, ex.fe_len); /* @@ -3073,7 +3073,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, in_range(block, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Allocating block in system zone - block = %llu", block); } @@ -3786,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, (unsigned long) pa->pa_len); - ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", + ext4_error(sb, __func__, "free %u, pa_free %u\n", free, pa->pa_free); /* * pa is already deleted so we use the value obtained @@ -4490,7 +4490,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > ext4_blocks_count(es)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Freeing blocks not in datazone - " "block = %lu, count = %lu", block, count); goto error_return; @@ -4531,7 +4531,7 @@ do_more: in_range(block + count - 1, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Freeing blocks in system zone - " "Block = %lu, count = %lu", block, count); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 63c33e05347..02cdaec39e2 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -353,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unrecognised inode hash code %d", root->info.hash_version); brelse(bh); @@ -367,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, hash = hinfo->hash; if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unimplemented inode hash flags: %#06x", root->info.unused_flags); brelse(bh); @@ -376,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, } if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Unimplemented inode hash depth: %#06x", root->info.indirect_levels); brelse(bh); @@ -389,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: limit != root limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -401,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: no count or count > limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -446,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "dx entry: limit != node limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -462,7 +462,7 @@ fail2: } fail: if (*err == ERR_BAD_DX_DIR) - ext4_warning(dir->i_sb, __FUNCTION__, + ext4_warning(dir->i_sb, __func__, "Corrupt dir inode %ld, running e2fsck is " "recommended.", dir->i_ino); return NULL; @@ -919,7 +919,7 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ - ext4_error(sb, __FUNCTION__, "reading directory #%lu " + ext4_error(sb, __func__, "reading directory #%lu " "offset %lu", dir->i_ino, (unsigned long)block); brelse(bh); @@ -1012,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); if (retval < 0) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "error reading index page in directory #%lu", dir->i_ino); *err = retval; @@ -1537,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Directory index full!"); err = -ENOSPC; goto cleanup; @@ -1865,11 +1865,11 @@ static int empty_dir (struct inode * inode) if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { if (err) - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "error %d reading directory #%lu offset 0", err, inode->i_ino); else - ext4_warning(inode->i_sb, __FUNCTION__, + ext4_warning(inode->i_sb, __func__, "bad directory (dir #%lu) - no data block", inode->i_ino); return 1; @@ -1898,7 +1898,7 @@ static int empty_dir (struct inode * inode) offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "error %d reading directory" " #%lu offset %lu", err, inode->i_ino, offset); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 728e3efa84b..3e0f5d06f3e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -50,63 +50,63 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Cannot add at group %u (only %lu groups)", input->group, sbi->s_groups_count); else if (offset != 0) - ext4_warning(sb, __FUNCTION__, "Last group not full"); + ext4_warning(sb, __func__, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) - ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", + ext4_warning(sb, __func__, "Reserved blocks too high (%u)", input->reserved_blocks); else if (free_blocks_count < 0) - ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", + ext4_warning(sb, __func__, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Cannot read last block (%llu)", end - 1); else if (outside(input->block_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap same as inode bitmap (%llu)", (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap (%llu) in inode table (%llu-%llu)", (unsigned long long)input->block_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap (%llu) in inode table (%llu-%llu)", (unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Block bitmap (%llu) in GDT table" " (%llu-%llu)", (unsigned long long)input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode bitmap (%llu) in GDT table" " (%llu-%llu)", (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Inode table (%llu-%llu) overlaps" "GDT table (%llu-%llu)", (unsigned long long)input->inode_table, @@ -368,7 +368,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "reserved GDT %llu" " missing grp %d (%llu)", blk, grp, @@ -424,7 +424,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); return -EPERM; @@ -448,7 +448,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "new group %u GDT block %llu not reserved", input->group, gdblock); err = -EINVAL; @@ -472,7 +472,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, GFP_KERNEL); if (!n_group_desc) { err = -ENOMEM; - ext4_warning (sb, __FUNCTION__, + ext4_warning(sb, __func__, "not enough memory for %lu groups", gdb_num + 1); goto exit_inode; } @@ -570,7 +570,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "reserved block %llu" " not at offset %ld", blk, @@ -714,7 +714,7 @@ static void update_backups(struct super_block *sb, */ exit_err: if (err) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't update backup for group %lu (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; @@ -754,33 +754,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Can't resize non-sparse filesystem further"); return -EPERM; } if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); + ext4_warning(sb, __func__, "blocks_count overflow\n"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); + ext4_warning(sb, __func__, "inodes_count overflow\n"); return -EINVAL; } if (reserved_gdb || gdb_off == 0) { if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)){ - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "No reserved GDT blocks, can't resize"); return -EPERM; } inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(inode)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "Error opening resize inode"); return PTR_ERR(inode); } @@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) lock_super(sb); if (input->group != sbi->s_groups_count) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; @@ -975,13 +975,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "CONFIG_LBD not enabled\n"); return -EINVAL; } if (n_blocks_count < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't shrink FS - resize aborted"); return -EBUSY; } @@ -990,7 +990,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); if (last == 0) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "need to use ext2online to resize further"); return -EPERM; } @@ -998,7 +998,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = EXT4_BLOCKS_PER_GROUP(sb) - last; if (o_blocks_count + add < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); + ext4_warning(sb, __func__, "blocks_count overflow"); return -EINVAL; } @@ -1006,7 +1006,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = n_blocks_count - o_blocks_count; if (o_blocks_count + add < n_blocks_count) - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "will only finish group (%llu" " blocks, %u new)", o_blocks_count + add, add); @@ -1014,7 +1014,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* See if the device is actually as big as what was requested */ bh = sb_bread(sb, o_blocks_count + add -1); if (!bh) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "can't read last block, resize aborted"); return -ENOSPC; } @@ -1026,13 +1026,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, handle = ext4_journal_start_sb(sb, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); - ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); + ext4_warning(sb, __func__, "error %d on journal start", err); goto exit_put; } lock_super(sb); if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); unlock_super(sb); ext4_journal_stop(handle); @@ -1042,7 +1042,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "error %d on journal write access", err); unlock_super(sb); ext4_journal_stop(handle); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 01d163964e6..93a7e746f42 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -135,7 +135,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; if (is_journal_aborted(journal)) { - ext4_abort(sb, __FUNCTION__, + ext4_abort(sb, __func__, "Detected aborted journal"); return ERR_PTR(-EROFS); } @@ -355,7 +355,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return; - ext4_warning(sb, __FUNCTION__, + ext4_warning(sb, __func__, "updating to rev %d because of new feature flag, " "running e2fsck is recommended", EXT4_DYNAMIC_REV); @@ -1511,7 +1511,7 @@ static int ext4_check_descriptors(struct super_block *sb) return 0; } if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "Checksum for group %lu failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); @@ -1605,7 +1605,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %lu to %Ld bytes\n", - __FUNCTION__, inode->i_ino, inode->i_size); + __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %Ld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); @@ -1613,7 +1613,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, } else { printk(KERN_DEBUG "%s: deleting unreferenced inode %lu\n", - __FUNCTION__, inode->i_ino); + __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); nr_orphans++; @@ -2699,9 +2699,9 @@ static void ext4_clear_journal_err(struct super_block * sb, char nbuf[16]; errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " + ext4_warning(sb, __func__, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, __FUNCTION__, "Marking fs in need of " + ext4_warning(sb, __func__, "Marking fs in need of " "filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; @@ -2828,7 +2828,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) } if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) - ext4_abort(sb, __FUNCTION__, "Abort forced by user"); + ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 739930427b8..f56598df688 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -227,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { -bad_block: ext4_error(inode->i_sb, __FUNCTION__, +bad_block: ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -369,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -661,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, __FUNCTION__, + ext4_error(sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -861,7 +861,7 @@ cleanup_dquot: goto cleanup; bad_block: - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; @@ -1164,7 +1164,7 @@ retry: if (!bh) goto cleanup; if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -1339,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) goto cleanup; bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: block %llu read error", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; @@ -1473,7 +1473,7 @@ again: } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, + ext4_error(inode->i_sb, __func__, "inode %lu: block %lu read error", inode->i_ino, (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= -- cgit v1.2.3 From 329d291f50d53f77d15769051f3eb494a9fd54b7 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 17 Apr 2008 10:38:59 -0400 Subject: jdb2: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Cc: Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 18 +++++++++--------- fs/jbd2/revoke.c | 2 +- fs/jbd2/transaction.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 64356e85a10..53632e3e845 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG "%s: error: j_commit_request=%d, tid=%d\n", - __FUNCTION__, journal->j_commit_request, tid); + __func__, journal->j_commit_request, tid); } spin_unlock(&journal->j_state_lock); #endif @@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", - __FUNCTION__, + __func__, blocknr, bdevname(journal->j_dev, b)); err = -EIO; @@ -1028,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); + __func__); kfree(journal); journal = NULL; goto out; @@ -1084,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); + __func__); kfree(journal); return NULL; } @@ -1093,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) /* If that failed, give up */ if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", - __FUNCTION__); + __func__); kfree(journal); return NULL; } @@ -1179,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal) */ printk(KERN_EMERG "%s: creation of journal on external device!\n", - __FUNCTION__); + __func__); BUG(); } @@ -1999,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void) jbd_debug(1, "out of memory for journal_head\n"); if (time_after(jiffies, last_warning + 5*HZ)) { printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", - __FUNCTION__); + __func__); last_warning = jiffies; } while (!ret) { @@ -2136,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) if (jh->b_frozen_data) { printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", - __FUNCTION__); + __func__); jbd2_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", - __FUNCTION__); + __func__); jbd2_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 144a2065f03..257ff262576 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -139,7 +139,7 @@ repeat: oom: if (!journal_oom_retry) return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); + jbd_debug(1, "ENOMEM in %s, retrying\n", __func__); yield(); goto repeat; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 401bf9d2365..d6e006e6780 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -696,7 +696,7 @@ repeat: if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", - __FUNCTION__); + __func__); JBUFFER_TRACE(jh, "oom!"); error = -ENOMEM; jbd_lock_bh_state(bh); @@ -914,7 +914,7 @@ repeat: committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", - __FUNCTION__); + __func__); err = -ENOMEM; goto out; } -- cgit v1.2.3 From 47df1793171771ff9b1622d30433ef6edf0eb280 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 25 Apr 2008 02:01:44 +0000 Subject: [CIFS] Update cifs version number Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index e1dd9f32e1d..503f6aa734a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -110,5 +110,5 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep, extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.52" +#define CIFS_VERSION "1.53" #endif /* _CIFSFS_H */ -- cgit v1.2.3 From 0206e61b467fde4d7b50f1a64355182a4fd9576b Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 25 Apr 2008 18:19:40 +0000 Subject: [CIFS] Fix spelling mistake Noticed by Joe Perches Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 9f49c2f3582..a0d26b540d4 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2050,7 +2050,7 @@ typedef struct { to 0xFFFF00 */ #define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */ -#define CIFS_UNIX_TRANPSORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ +#define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */ #define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and QFS PROXY call */ #ifdef CONFIG_CIFS_POSIX -- cgit v1.2.3 From d09e860cf07e7c9ee12920a09f5080e30a12a23a Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 26 Apr 2008 00:22:23 +0000 Subject: [CIFS] Adds to dns_resolver checking if the server name is an IP addr and skipping upcall in this case. Signed-off-by: Igor Mammedov Signed-off-by: sfrench@us.ibm.com --- fs/cifs/dns_resolve.c | 62 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 7cc86c41818..939e256f849 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -55,6 +55,32 @@ struct key_type key_type_dns_resolver = { .match = user_match, }; +/* Checks if supplied name is IP address + * returns: + * 1 - name is IP + * 0 - name is not IP + */ +static int is_ip(const char *name) +{ + int rc; + struct sockaddr_in sin_server; + struct sockaddr_in6 sin_server6; + + rc = cifs_inet_pton(AF_INET, name, + &sin_server.sin_addr.s_addr); + + if (rc <= 0) { + /* not ipv4 address, try ipv6 */ + rc = cifs_inet_pton(AF_INET6, name, + &sin_server6.sin6_addr.in6_u); + if (rc > 0) + return 1; + } else { + return 1; + } + /* we failed translating address */ + return 0; +} /* Resolves server name to ip address. * input: @@ -67,8 +93,9 @@ int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) { int rc = -EAGAIN; - struct key *rkey; + struct key *rkey = ERR_PTR(-EAGAIN); char *name; + char *data = NULL; int len; if (!ip_addr || !unc) @@ -97,26 +124,41 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) memcpy(name, unc+2, len); name[len] = 0; + if (is_ip(name)) { + cFYI(1, ("%s: it is IP, skipping dns upcall: %s", + __func__, name)); + data = name; + goto skip_upcall; + } + rkey = request_key(&key_type_dns_resolver, name, ""); if (!IS_ERR(rkey)) { - len = strlen(rkey->payload.data); - *ip_addr = kmalloc(len+1, GFP_KERNEL); - if (*ip_addr) { - memcpy(*ip_addr, rkey->payload.data, len); - (*ip_addr)[len] = '\0'; - cFYI(1, ("%s: resolved: %s to %s", __func__, + data = rkey->payload.data; + cFYI(1, ("%s: resolved: %s to %s", __func__, rkey->description, *ip_addr )); + } else { + cERROR(1, ("%s: unable to resolve: %s", __func__, name)); + goto out; + } + +skip_upcall: + if (data) { + len = strlen(data); + *ip_addr = kmalloc(len+1, GFP_KERNEL); + if (*ip_addr) { + memcpy(*ip_addr, data, len); + (*ip_addr)[len] = '\0'; rc = 0; } else { rc = -ENOMEM; } - key_put(rkey); - } else { - cERROR(1, ("%s: unable to resolve: %s", __func__, name)); + if (!IS_ERR(rkey)) + key_put(rkey); } +out: kfree(name); return rc; } -- cgit v1.2.3 From 39da9847113a870b20fee9a7c216a848b9a5e9f7 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 28 Apr 2008 04:04:34 +0000 Subject: [CIFS] Fix statfs formatting Signed-off-by: Christoph Hellwig Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 +++ fs/cifs/cifsfs.c | 66 ++++++++++++++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 05c9da6181c..8355e918fdd 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,6 @@ +Version 1.53 +------------ + Version 1.52 ------------ Fix oops on second mount to server when null auth is used. diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 39c2cbdface..68f1cdf6aed 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -222,50 +222,50 @@ static int cifs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - int xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifsTconInfo *tcon = cifs_sb->tcon; int rc = -EOPNOTSUPP; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; + int xid; xid = GetXid(); - cifs_sb = CIFS_SB(sb); - pTcon = cifs_sb->tcon; - buf->f_type = CIFS_MAGIC_NUMBER; - /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */ - buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would - presumably be total path, but note - that some servers (includinng Samba 3) - have a shorter maximum path */ + /* + * PATH_MAX may be too long - it would presumably be total path, + * but note that some servers (includinng Samba 3) have a shorter + * maximum path. + * + * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO. + */ + buf->f_namelen = PATH_MAX; buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ -/* BB we could add a second check for a QFS Unix capability bit */ -/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */ - if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS & - le64_to_cpu(pTcon->fsUnixInfo.Capability))) - rc = CIFSSMBQFSPosixInfo(xid, pTcon, buf); - - /* Only need to call the old QFSInfo if failed - on newer one */ - if (rc) - if (pTcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */ - - /* Some old Windows servers also do not support level 103, retry with - older level one if old server failed the previous call or we - bypassed it because we detected that this was an older LANMAN sess */ + /* + * We could add a second check for a QFS Unix capability bit + */ + if ((tcon->ses->capabilities & CAP_UNIX) && + (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) + rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); + + /* + * Only need to call the old QFSInfo if failed on newer one, + * e.g. by OS/2. + **/ + if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) + rc = CIFSSMBQFSInfo(xid, tcon, buf); + + /* + * Some old Windows servers also do not support level 103, retry with + * older level one if old server failed the previous call or we + * bypassed it because we detected that this was an older LANMAN sess + */ if (rc) - rc = SMBOldQFSInfo(xid, pTcon, buf); - /* int f_type; - __fsid_t f_fsid; - int f_namelen; */ - /* BB get from info in tcon struct at mount time call to QFSAttrInfo */ + rc = SMBOldQFSInfo(xid, tcon, buf); + FreeXid(xid); - return 0; /* always return success? what if volume is no - longer available? */ + return 0; } static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) -- cgit v1.2.3 From 22ba0317c81ba263172baaefd2cb38de78c4598f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 28 Apr 2008 18:38:49 +0300 Subject: udf: fs/udf/partition.c:udf_get_pblock() mustn't be inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following build error with UML and gcc 4.3: <-- snip --> ... CC fs/udf/partition.o /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/udf/partition.c: In function ‘udf_get_pblock_virt15’: /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/udf/partition.c:32: sorry, unimplemented: inlining failed in call to ‘udf_get_pblock’: function body not available /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/udf/partition.c:102: sorry, unimplemented: called from here make[3]: *** [fs/udf/partition.o] Error 1 <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: Jan Kara --- fs/udf/partition.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 63610f026ae..96dfd207c3d 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -27,8 +27,8 @@ #include #include -inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, - uint16_t partition, uint32_t offset) +uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, + uint16_t partition, uint32_t offset) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; -- cgit v1.2.3 From bf62fd887cab230f5952b611bde25e8e15acb454 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 28 Apr 2008 23:05:58 +0000 Subject: [CIFS] fixed compatibility issue with samba refferal request treeName part is canonicalized to '/' path separator Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 95024c066d8..f6fdecf6598 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -93,15 +93,11 @@ static char *cifs_get_share_name(const char *node_name) /* find sharename end */ pSep++; pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); - if (!pSep) { - cERROR(1, ("%s:2 cant find share name in node name: %s", - __func__, node_name)); - kfree(UNC); - return NULL; + if (pSep) { + /* trim path up to sharename end + * now we have share name in UNC */ + *pSep = 0; } - /* trim path up to sharename end - * * now we have share name in UNC */ - *pSep = 0; return UNC; } @@ -188,7 +184,7 @@ static char *compose_mount_options(const char *sb_mountdata, tkn_e = strchr(tkn_e+1, '\\'); if (tkn_e) { strcat(mountdata, ",prefixpath="); - strcat(mountdata, tkn_e); + strcat(mountdata, tkn_e+1); } } @@ -244,7 +240,8 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry) return NULL; if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) { - /* we should use full path name to correct working with DFS */ + int i; + /* we should use full path name for correct working with DFS */ l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) + strnlen(search_path, MAX_PATHCONF) + 1; tmp_path = kmalloc(l_max_len, GFP_KERNEL); @@ -253,8 +250,14 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry) return NULL; } strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len); - strcat(tmp_path, search_path); tmp_path[l_max_len-1] = 0; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + for (i = 0; i < l_max_len; i++) { + if (tmp_path[i] == '\\') + tmp_path[i] = '/'; + } + strncat(tmp_path, search_path, l_max_len - strlen(tmp_path)); + full_path = tmp_path; kfree(search_path); } else { -- cgit v1.2.3 From 4b18f2a9c3964f7612b7403dddc1d1ba5443ae24 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 29 Apr 2008 00:06:05 +0000 Subject: [CIFS] convert usage of implicit booleans to bool Signed-off-by: Joe Perches Signed-off-by: Steve French --- fs/cifs/asn1.c | 10 +++---- fs/cifs/cifsacl.c | 16 +++++------ fs/cifs/cifsfs.c | 6 ++-- fs/cifs/cifsfs.h | 8 ------ fs/cifs/cifsglob.h | 44 ++++++++++++----------------- fs/cifs/cifsproto.h | 13 +++++---- fs/cifs/cifssmb.c | 38 ++++++++++++------------- fs/cifs/connect.c | 80 ++++++++++++++++++++++++++-------------------------- fs/cifs/dir.c | 18 ++++++------ fs/cifs/fcntl.c | 2 +- fs/cifs/file.c | 80 ++++++++++++++++++++++++++-------------------------- fs/cifs/inode.c | 42 +++++++++++++-------------- fs/cifs/link.c | 2 +- fs/cifs/misc.c | 33 +++++++++++----------- fs/cifs/readdir.c | 12 ++++---- fs/cifs/smbencrypt.c | 8 +++--- fs/cifs/xattr.c | 2 +- 17 files changed, 200 insertions(+), 214 deletions(-) (limited to 'fs') diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index bcda2c6b6a0..cb52cbbe45f 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -460,8 +460,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned char *sequence_end; unsigned long *oid = NULL; unsigned int cls, con, tag, oidlen, rc; - int use_ntlmssp = FALSE; - int use_kerberos = FALSE; + bool use_ntlmssp = false; + bool use_kerberos = false; *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ @@ -561,15 +561,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) - use_kerberos = TRUE; + use_kerberos = true; else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) - use_kerberos = TRUE; + use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) - use_ntlmssp = TRUE; + use_ntlmssp = true; kfree(oid); } diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index e99d4faf5f0..6fe1bc5bb36 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -559,7 +559,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, const char *path, const __u16 *pfid) { struct cifsFileInfo *open_file = NULL; - int unlock_file = FALSE; + bool unlock_file = false; int xid; int rc = -EIO; __u16 fid; @@ -586,10 +586,10 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, cifs_sb = CIFS_SB(sb); if (open_file) { - unlock_file = TRUE; + unlock_file = true; fid = open_file->netfid; } else if (pfid == NULL) { - int oplock = FALSE; + bool oplock = false; /* open file */ rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, &fid, &oplock, NULL, @@ -604,7 +604,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); - if (unlock_file == TRUE) /* find_readable_file increments ref count */ + if (unlock_file == true) /* find_readable_file increments ref count */ atomic_dec(&open_file->wrtPending); else if (pfid == NULL) /* if opened above we have to close the handle */ CIFSSMBClose(xid, cifs_sb->tcon, fid); @@ -619,7 +619,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path) { struct cifsFileInfo *open_file; - int unlock_file = FALSE; + bool unlock_file = false; int xid; int rc = -EIO; __u16 fid; @@ -640,10 +640,10 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, open_file = find_readable_file(CIFS_I(inode)); if (open_file) { - unlock_file = TRUE; + unlock_file = true; fid = open_file->netfid; } else { - int oplock = FALSE; + int oplock = 0; /* open file */ rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0, &fid, &oplock, NULL, @@ -658,7 +658,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); - if (unlock_file == TRUE) + if (unlock_file) atomic_dec(&open_file->wrtPending); else CIFSSMBClose(xid, cifs_sb->tcon, fid); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 68f1cdf6aed..427a7c69589 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -306,8 +306,8 @@ cifs_alloc_inode(struct super_block *sb) /* Until the file is open and we have gotten oplock info back from the server, can not assume caching of file data or metadata */ - cifs_inode->clientCanCacheRead = FALSE; - cifs_inode->clientCanCacheAll = FALSE; + cifs_inode->clientCanCacheRead = false; + cifs_inode->clientCanCacheAll = false; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ /* Can not set i_flags here - they get immediately overwritten @@ -940,7 +940,7 @@ static int cifs_oplock_thread(void *dummyarg) rc = CIFSSMBLock(0, pTcon, netfid, 0 /* len */ , 0 /* offset */, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, - 0 /* wait flag */); + false /* wait flag */); cFYI(1, ("Oplock release rc = %d", rc)); } } else diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 503f6aa734a..cd1301a09b3 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -24,14 +24,6 @@ #define ROOT_I 2 -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 69a2e194254..b7d9f698e63 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -57,14 +57,6 @@ #include "cifspdu.h" -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - #ifndef XATTR_DOS_ATTRIB #define XATTR_DOS_ATTRIB "user.DOSATTRIB" #endif @@ -147,7 +139,7 @@ struct TCP_Server_Info { enum protocolEnum protocolType; char versionMajor; char versionMinor; - unsigned svlocal:1; /* local server or remote */ + bool svlocal:1; /* local server or remote */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 @@ -286,10 +278,10 @@ struct cifsTconInfo { FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; - unsigned ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ - unsigned retry:1; - unsigned nocase:1; - unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol + bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ + bool retry:1; + bool nocase:1; + bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -317,10 +309,10 @@ struct cifs_search_info { char *srch_entries_start; char *presume_name; unsigned int resume_name_len; - unsigned endOfSearch:1; - unsigned emptyDir:1; - unsigned unicode:1; - unsigned smallBuf:1; /* so we know which buf_release function to call */ + bool endOfSearch:1; + bool emptyDir:1; + bool unicode:1; + bool smallBuf:1; /* so we know which buf_release function to call */ }; struct cifsFileInfo { @@ -335,9 +327,9 @@ struct cifsFileInfo { struct inode *pInode; /* needed for oplock break */ struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ - unsigned closePend:1; /* file is marked to close */ - unsigned invalidHandle:1; /* file closed via session abend */ - unsigned messageMode:1; /* for pipes: message vs byte mode */ + bool closePend:1; /* file is marked to close */ + bool invalidHandle:1; /* file closed via session abend */ + bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ char *search_resume_name; /* BB removeme BB */ @@ -356,9 +348,9 @@ struct cifsInodeInfo { __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ atomic_t inUse; /* num concurrent users (local openers cifs) of file*/ unsigned long time; /* jiffies of last update/check of inode */ - unsigned clientCanCacheRead:1; /* read oplock */ - unsigned clientCanCacheAll:1; /* read and writebehind oplock */ - unsigned oplockPending:1; + bool clientCanCacheRead:1; /* read oplock */ + bool clientCanCacheAll:1; /* read and writebehind oplock */ + bool oplockPending:1; struct inode vfs_inode; }; @@ -426,9 +418,9 @@ struct mid_q_entry { struct smb_hdr *resp_buf; /* response buffer */ int midState; /* wish this were enum but can not pass to wait_event */ __u8 command; /* smb command code */ - unsigned largeBuf:1; /* if valid response, is pointer to large buf */ - unsigned multiRsp:1; /* multiple trans2 responses for one request */ - unsigned multiEnd:1; /* both received */ + bool largeBuf:1; /* if valid response, is pointer to large buf */ + bool multiRsp:1; /* multiple trans2 responses for one request */ + bool multiEnd:1; /* both received */ }; struct oplock_q_entry { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 50f9fdae19b..a249a29109a 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -59,8 +59,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); -extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); -extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); +extern bool is_valid_oplock_break(struct smb_hdr *smb, + struct TCP_Server_Info *); +extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); #ifdef CONFIG_CIFS_EXPERIMENTAL extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *); @@ -187,12 +188,12 @@ extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, #endif /* possibly unneeded function */ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, __u64 size, - int setAllocationSizeFlag, + bool setAllocationSizeFlag, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fileHandle, __u32 opener_pid, - int AllocSizeFlag); + bool AllocSizeFlag); extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, char *full_path, __u64 mode, __u64 uid, __u64 gid, dev_t dev, @@ -291,11 +292,11 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 netfid, const __u64 len, const __u64 offset, const __u32 numUnlock, const __u32 numLock, const __u8 lockType, - const int waitFlag); + const bool waitFlag); extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, struct file_lock *, - const __u16 lock_type, const int waitFlag); + const __u16 lock_type, const bool waitFlag); extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4728fa982a4..cfd9750852b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -95,7 +95,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); if (open_file) - open_file->invalidHandle = TRUE; + open_file->invalidHandle = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted @@ -141,7 +141,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon->ses->server->tcpStatus == CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if ((tcon->retry == FALSE) || + if (!tcon->retry || (tcon->ses->status == CifsExiting)) { cFYI(1, ("gave up waiting on " "reconnect in smb_init")); @@ -289,7 +289,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon->ses->server->tcpStatus == CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if ((tcon->retry == FALSE) || + if (!tcon->retry || (tcon->ses->status == CifsExiting)) { cFYI(1, ("gave up waiting on " "reconnect in smb_init")); @@ -1686,7 +1686,7 @@ int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const __u64 len, const __u64 offset, const __u32 numUnlock, - const __u32 numLock, const __u8 lockType, const int waitFlag) + const __u32 numLock, const __u8 lockType, const bool waitFlag) { int rc = 0; LOCK_REQ *pSMB = NULL; @@ -1695,7 +1695,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, int timeout = 0; __u16 count; - cFYI(1, ("CIFSSMBLock timeout %d numLock %d", waitFlag, numLock)); + cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock)); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); if (rc) @@ -1706,7 +1706,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = CIFS_ASYNC_OP; /* no response expected */ pSMB->Timeout = 0; - } else if (waitFlag == TRUE) { + } else if (waitFlag) { timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */ pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */ } else { @@ -1756,7 +1756,7 @@ int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, struct file_lock *pLockData, const __u16 lock_type, - const int waitFlag) + const bool waitFlag) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -3581,9 +3581,9 @@ findFirstRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc == 0) { if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - psrch_inf->unicode = TRUE; + psrch_inf->unicode = true; else - psrch_inf->unicode = FALSE; + psrch_inf->unicode = false; psrch_inf->ntwrk_buf_start = (char *)pSMBr; psrch_inf->smallBuf = 0; @@ -3594,9 +3594,9 @@ findFirstRetry: le16_to_cpu(pSMBr->t2.ParameterOffset)); if (parms->EndofSearch) - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; else - psrch_inf->endOfSearch = FALSE; + psrch_inf->endOfSearch = false; psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); @@ -3624,7 +3624,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("In FindNext")); - if (psrch_inf->endOfSearch == TRUE) + if (psrch_inf->endOfSearch) return -ENOENT; rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3682,7 +3682,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, cifs_stats_inc(&tcon->num_fnext); if (rc) { if (rc == -EBADF) { - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; rc = 0; /* search probably was closed at end of search*/ } else cFYI(1, ("FindNext returned = %d", rc)); @@ -3692,9 +3692,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, if (rc == 0) { /* BB fixme add lock for file (srch_info) struct here */ if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - psrch_inf->unicode = TRUE; + psrch_inf->unicode = true; else - psrch_inf->unicode = FALSE; + psrch_inf->unicode = false; response_data = (char *) &pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.ParameterOffset); parms = (T2_FNEXT_RSP_PARMS *)response_data; @@ -3709,9 +3709,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, psrch_inf->ntwrk_buf_start = (char *)pSMB; psrch_inf->smallBuf = 0; if (parms->EndofSearch) - psrch_inf->endOfSearch = TRUE; + psrch_inf->endOfSearch = true; else - psrch_inf->endOfSearch = FALSE; + psrch_inf->endOfSearch = false; psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); psrch_inf->index_of_last_entry += @@ -4586,7 +4586,7 @@ QFSPosixRetry: int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, - __u64 size, int SetAllocation, + __u64 size, bool SetAllocation, const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; @@ -4675,7 +4675,7 @@ SetEOFRetry: int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, - __u16 fid, __u32 pid_of_opener, int SetAllocation) + __u16 fid, __u32 pid_of_opener, bool SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; char *data_offset; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e1710673016..6c4332f8da6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -71,23 +71,23 @@ struct smb_vol { mode_t file_mode; mode_t dir_mode; unsigned secFlg; - unsigned rw:1; - unsigned retry:1; - unsigned intr:1; - unsigned setuids:1; - unsigned override_uid:1; - unsigned override_gid:1; - unsigned noperm:1; - unsigned no_psx_acl:1; /* set if posix acl support should be disabled */ - unsigned cifs_acl:1; - unsigned no_xattr:1; /* set if xattr (EA) support should be disabled*/ - unsigned server_ino:1; /* use inode numbers from server ie UniqueId */ - unsigned direct_io:1; - unsigned remap:1; /* set to remap seven reserved chars in filenames */ - unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ - unsigned no_linux_ext:1; - unsigned sfu_emul:1; - unsigned nullauth:1; /* attempt to authenticate with null user */ + bool rw:1; + bool retry:1; + bool intr:1; + bool setuids:1; + bool override_uid:1; + bool override_gid:1; + bool noperm:1; + bool no_psx_acl:1; /* set if posix acl support should be disabled */ + bool cifs_acl:1; + bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ + bool server_ino:1; /* use inode numbers from server ie UniqueId */ + bool direct_io:1; + bool remap:1; /* set to remap seven reserved chars in filenames */ + bool posix_paths:1; /* unset to not ask for posix pathnames. */ + bool no_linux_ext:1; + bool sfu_emul:1; + bool nullauth:1; /* attempt to authenticate with null user */ unsigned nocase; /* request case insensitive filenames */ unsigned nobrl; /* disable sending byte range locks to srv */ unsigned int rsize; @@ -345,8 +345,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; char temp; - int isLargeBuf = FALSE; - int isMultiRsp; + bool isLargeBuf = false; + bool isMultiRsp; int reconnect; current->flags |= PF_MEMALLOC; @@ -390,8 +390,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } else /* if existing small buf clear beginning */ memset(smallbuf, 0, sizeof(struct smb_hdr)); - isLargeBuf = FALSE; - isMultiRsp = FALSE; + isLargeBuf = false; + isMultiRsp = false; smb_buffer = smallbuf; iov.iov_base = smb_buffer; iov.iov_len = 4; @@ -517,7 +517,7 @@ incomplete_rcv: reconnect = 0; if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { - isLargeBuf = TRUE; + isLargeBuf = true; memcpy(bigbuf, smallbuf, 4); smb_buffer = bigbuf; } @@ -582,16 +582,18 @@ incomplete_rcv: (mid_entry->command == smb_buffer->Command)) { if (check2ndT2(smb_buffer,server->maxBuf) > 0) { /* We have a multipart transact2 resp */ - isMultiRsp = TRUE; + isMultiRsp = true; if (mid_entry->resp_buf) { /* merge response - fix up 1st*/ if (coalesce_t2(smb_buffer, mid_entry->resp_buf)) { - mid_entry->multiRsp = 1; + mid_entry->multiRsp = + true; break; } else { /* all parts received */ - mid_entry->multiEnd = 1; + mid_entry->multiEnd = + true; goto multi_t2_fnd; } } else { @@ -603,17 +605,15 @@ incomplete_rcv: /* Have first buffer */ mid_entry->resp_buf = smb_buffer; - mid_entry->largeBuf = 1; + mid_entry->largeBuf = + true; bigbuf = NULL; } } break; } mid_entry->resp_buf = smb_buffer; - if (isLargeBuf) - mid_entry->largeBuf = 1; - else - mid_entry->largeBuf = 0; + mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: task_to_wake = mid_entry->tsk; mid_entry->midState = MID_RESPONSE_RECEIVED; @@ -638,8 +638,8 @@ multi_t2_fnd: smallbuf = NULL; } wake_up_process(task_to_wake); - } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) - && (isMultiRsp == FALSE)) { + } else if (!is_valid_oplock_break(smb_buffer, server) && + !isMultiRsp) { cERROR(1, ("No task to wake, unknown frame received! " "NumMids %d", midCount.counter)); cifs_dump_mem("Received Data is: ", (char *)smb_buffer, @@ -825,7 +825,7 @@ cifs_parse_mount_options(char *options, const char *devname, vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - vol->rw = TRUE; + vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; @@ -1181,7 +1181,7 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { - vol->rw = TRUE; + vol->rw = true; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1197,7 +1197,7 @@ cifs_parse_mount_options(char *options, const char *devname, is ok to just ignore them */ continue; } else if (strnicmp(data, "ro", 2) == 0) { - vol->rw = FALSE; + vol->rw = false; } else if (strnicmp(data, "hard", 4) == 0) { vol->retry = 1; } else if (strnicmp(data, "soft", 4) == 0) { @@ -2602,7 +2602,7 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings, static int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, - struct cifsSesInfo *ses, int *pNTLMv2_flag, + struct cifsSesInfo *ses, bool *pNTLMv2_flag, const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -2625,7 +2625,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, if (ses == NULL) return -EINVAL; domain = ses->domainName; - *pNTLMv2_flag = FALSE; + *pNTLMv2_flag = false; smb_buffer = cifs_buf_get(); if (smb_buffer == NULL) { return -ENOMEM; @@ -2778,7 +2778,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, CIFS_CRYPTO_KEY_SIZE); if (SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) - *pNTLMv2_flag = TRUE; + *pNTLMv2_flag = true; if ((SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) @@ -2939,7 +2939,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, } static int CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *ntlm_session_key, int ntlmv2_flag, + char *ntlm_session_key, bool ntlmv2_flag, const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -3569,7 +3569,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, { int rc = 0; char ntlm_session_key[CIFS_SESS_KEY_SIZE]; - int ntlmv2_flag = FALSE; + bool ntlmv2_flag = false; int first_time = 0; /* what if server changes its buffer size after dropping the session? */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0f5c62ba403..6ed775986be 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -130,7 +130,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, struct cifsFileInfo *pCifsFile = NULL; struct cifsInodeInfo *pCifsInode; int disposition = FILE_OVERWRITE_IF; - int write_only = FALSE; + bool write_only = false; xid = GetXid(); @@ -152,7 +152,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (oflags & FMODE_WRITE) { desiredAccess |= GENERIC_WRITE; if (!(oflags & FMODE_READ)) - write_only = TRUE; + write_only = true; } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -254,7 +254,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, d_instantiate(direntry, newinode); } if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || - ((nd->flags & LOOKUP_OPEN) == FALSE)) { + (!(nd->flags & LOOKUP_OPEN))) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, pTcon, fileHandle); } else if (newinode) { @@ -266,8 +266,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; pCifsFile->pInode = newinode; - pCifsFile->invalidHandle = FALSE; - pCifsFile->closePend = FALSE; + pCifsFile->invalidHandle = false; + pCifsFile->closePend = false; init_MUTEX(&pCifsFile->fh_sem); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); @@ -280,7 +280,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsInode = CIFS_I(newinode); if (pCifsInode) { /* if readable file instance put first in list*/ - if (write_only == TRUE) { + if (write_only) { list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); } else { @@ -288,12 +288,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, &pCifsInode->openFileList); } if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock inode %p", newinode)); } else if ((oplock & 0xF) == OPLOCK_READ) - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheRead = true; } write_unlock(&GlobalSMBSeslock); } diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index 7d1d5aa4c43..5a57581eb4b 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -68,7 +68,7 @@ int cifs_dir_notify(struct file *file, unsigned long arg) { int xid; int rc = -EINVAL; - int oplock = FALSE; + int oplock = 0; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 40b690073fc..31a0a33b9d9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -51,8 +51,8 @@ static inline struct cifsFileInfo *cifs_init_private( INIT_LIST_HEAD(&private_data->llist); private_data->pfile = file; /* needed for writepage */ private_data->pInode = inode; - private_data->invalidHandle = FALSE; - private_data->closePend = FALSE; + private_data->invalidHandle = false; + private_data->closePend = false; /* we have to track num writers to the inode, since writepages does not tell us which handle the write is for so there can be a close (overlapping with write) of the filehandle that @@ -148,12 +148,12 @@ client_can_cache: full_path, buf, inode->i_sb, xid, NULL); if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock granted on inode %p", file->f_path.dentry->d_inode)); } else if ((*oplock & 0xF) == OPLOCK_READ) - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheRead = true; return rc; } @@ -247,7 +247,7 @@ int cifs_open(struct inode *inode, struct file *file) if (oplockEnabled) oplock = REQ_OPLOCK; else - oplock = FALSE; + oplock = 0; /* BB pass O_SYNC flag through on file attributes .. BB */ @@ -339,7 +339,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) return rc; } -static int cifs_reopen_file(struct file *file, int can_flush) +static int cifs_reopen_file(struct file *file, bool can_flush) { int rc = -EACCES; int xid, oplock; @@ -360,7 +360,7 @@ static int cifs_reopen_file(struct file *file, int can_flush) xid = GetXid(); down(&pCifsFile->fh_sem); - if (pCifsFile->invalidHandle == FALSE) { + if (!pCifsFile->invalidHandle) { up(&pCifsFile->fh_sem); FreeXid(xid); return 0; @@ -404,7 +404,7 @@ reopen_error_exit: if (oplockEnabled) oplock = REQ_OPLOCK; else - oplock = FALSE; + oplock = 0; /* Can not refresh inode by passing in file_info buf to be returned by SMBOpen and then calling get_inode_info with returned buf @@ -422,7 +422,7 @@ reopen_error_exit: cFYI(1, ("oplock: %d", oplock)); } else { pCifsFile->netfid = netfid; - pCifsFile->invalidHandle = FALSE; + pCifsFile->invalidHandle = false; up(&pCifsFile->fh_sem); pCifsInode = CIFS_I(inode); if (pCifsInode) { @@ -432,8 +432,8 @@ reopen_error_exit: CIFS_I(inode)->write_behind_rc = rc; /* temporarily disable caching while we go to server to get inode info */ - pCifsInode->clientCanCacheAll = FALSE; - pCifsInode->clientCanCacheRead = FALSE; + pCifsInode->clientCanCacheAll = false; + pCifsInode->clientCanCacheRead = false; if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, xid); @@ -448,16 +448,16 @@ reopen_error_exit: we can not go to the server to get the new inod info */ if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = true; + pCifsInode->clientCanCacheRead = true; cFYI(1, ("Exclusive Oplock granted on inode %p", file->f_path.dentry->d_inode)); } else if ((oplock & 0xF) == OPLOCK_READ) { - pCifsInode->clientCanCacheRead = TRUE; - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheRead = true; + pCifsInode->clientCanCacheAll = false; } else { - pCifsInode->clientCanCacheRead = FALSE; - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheRead = false; + pCifsInode->clientCanCacheAll = false; } cifs_relock_file(pCifsFile); } @@ -484,7 +484,7 @@ int cifs_close(struct inode *inode, struct file *file) if (pSMBFile) { struct cifsLockInfo *li, *tmp; - pSMBFile->closePend = TRUE; + pSMBFile->closePend = true; if (pTcon) { /* no sense reconnecting to close a file that is already closed */ @@ -553,8 +553,8 @@ int cifs_close(struct inode *inode, struct file *file) cFYI(1, ("closing last open instance for inode %p", inode)); /* if the file is not open we do not know if we can cache info on this inode, much less write behind and read ahead */ - CIFS_I(inode)->clientCanCacheRead = FALSE; - CIFS_I(inode)->clientCanCacheAll = FALSE; + CIFS_I(inode)->clientCanCacheRead = false; + CIFS_I(inode)->clientCanCacheAll = false; } read_unlock(&GlobalSMBSeslock); if ((rc == 0) && CIFS_I(inode)->write_behind_rc) @@ -583,9 +583,9 @@ int cifs_closedir(struct inode *inode, struct file *file) pTcon = cifs_sb->tcon; cFYI(1, ("Freeing private data in close dir")); - if ((pCFileStruct->srch_inf.endOfSearch == FALSE) && - (pCFileStruct->invalidHandle == FALSE)) { - pCFileStruct->invalidHandle = TRUE; + if (!pCFileStruct->srch_inf.endOfSearch && + !pCFileStruct->invalidHandle) { + pCFileStruct->invalidHandle = true; rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); cFYI(1, ("Closing uncompleted readdir with rc %d", rc)); @@ -637,12 +637,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) __u32 numLock = 0; __u32 numUnlock = 0; __u64 length; - int wait_flag = FALSE; + bool wait_flag = false; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; __u16 netfid; __u8 lockType = LOCKING_ANDX_LARGE_FILES; - int posix_locking; + bool posix_locking; length = 1 + pfLock->fl_end - pfLock->fl_start; rc = -EACCES; @@ -659,7 +659,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) cFYI(1, ("Flock")); if (pfLock->fl_flags & FL_SLEEP) { cFYI(1, ("Blocking lock")); - wait_flag = TRUE; + wait_flag = true; } if (pfLock->fl_flags & FL_ACCESS) cFYI(1, ("Process suspended by mandatory locking - " @@ -794,7 +794,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) stored_rc = CIFSSMBLock(xid, pTcon, netfid, li->length, li->offset, - 1, 0, li->type, FALSE); + 1, 0, li->type, false); if (stored_rc) rc = stored_rc; @@ -866,7 +866,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, filemap_fdatawait from here so tell reopen_file not to flush data to server now */ - rc = cifs_reopen_file(file, FALSE); + rc = cifs_reopen_file(file, false); if (rc != 0) break; } @@ -966,7 +966,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, filemap_fdatawait from here so tell reopen_file not to flush data to server now */ - rc = cifs_reopen_file(file, FALSE); + rc = cifs_reopen_file(file, false); if (rc != 0) break; } @@ -1093,7 +1093,7 @@ refind_writable: read_unlock(&GlobalSMBSeslock); /* Had to unlock since following call can block */ - rc = cifs_reopen_file(open_file->pfile, FALSE); + rc = cifs_reopen_file(open_file->pfile, false); if (!rc) { if (!open_file->closePend) return open_file; @@ -1608,7 +1608,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, int buf_type = CIFS_NO_BUFFER; if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -1693,7 +1693,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, while (rc == -EAGAIN) { if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -1850,7 +1850,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, while (rc == -EAGAIN) { if ((open_file->invalidHandle) && (!open_file->closePend)) { - rc = cifs_reopen_file(file, TRUE); + rc = cifs_reopen_file(file, true); if (rc != 0) break; } @@ -2009,10 +2009,10 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode) refreshing the inode only on increases in the file size but this is tricky to do without racing with writebehind page caching in the current Linux kernel design */ -int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) +bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) { if (!cifsInode) - return 1; + return true; if (is_inode_writable(cifsInode)) { /* This inode is open for write at least once */ @@ -2022,15 +2022,15 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { /* since no page cache to corrupt on directio we can change size safely */ - return 1; + return true; } if (i_size_read(&cifsInode->vfs_inode) < end_of_file) - return 1; + return true; - return 0; + return false; } else - return 1; + return true; } static int cifs_prepare_write(struct file *file, struct page *page, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e1031b9e2c5..8eaa9e72fe8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -281,7 +281,7 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, struct cifs_sb_info *cifs_sb, int xid) { int rc; - int oplock = FALSE; + int oplock = 0; __u16 netfid; struct cifsTconInfo *pTcon = cifs_sb->tcon; char buf[24]; @@ -389,7 +389,7 @@ int cifs_get_inode_info(struct inode **pinode, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); const unsigned char *full_path = NULL; char *buf = NULL; - int adjustTZ = FALSE; + bool adjustTZ = bool; bool is_dfs_referral = false; pTcon = cifs_sb->tcon; @@ -425,7 +425,7 @@ try_again_CIFSSMBQPathInfo: pfindData, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - adjustTZ = TRUE; + adjustTZ = true; } } /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ @@ -703,7 +703,7 @@ psx_del_no_retry: } else if (rc == -ENOENT) { d_drop(direntry); } else if (rc == -ETXTBSY) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, @@ -736,7 +736,7 @@ psx_del_no_retry: rc = -EOPNOTSUPP; if (rc == -EOPNOTSUPP) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, full_path, @@ -774,7 +774,7 @@ psx_del_no_retry: if (direntry->d_inode) drop_nlink(direntry->d_inode); } else if (rc == -ETXTBSY) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; rc = CIFSSMBOpen(xid, pTcon, full_path, @@ -1149,7 +1149,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, cFYI(1, ("rename rc %d", rc)); if ((rc == -EIO) || (rc == -EEXIST)) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; /* BB FIXME Is Generic Read correct for rename? */ @@ -1186,7 +1186,7 @@ int cifs_revalidate(struct dentry *direntry) struct cifsInodeInfo *cifsInode; loff_t local_size; struct timespec local_mtime; - int invalidate_inode = FALSE; + bool invalidate_inode = false; if (direntry->d_inode == NULL) return -ENOENT; @@ -1268,7 +1268,7 @@ int cifs_revalidate(struct dentry *direntry) only ones who could have modified the file and the server copy is staler than ours */ } else { - invalidate_inode = TRUE; + invalidate_inode = true; } } @@ -1402,8 +1402,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) int rc = -EACCES; struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; - int set_time = FALSE; - int set_dosattr = FALSE; + bool set_time = false; + bool set_dosattr = false; __u64 mode = 0xFFFFFFFFFFFFFFFFULL; __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; @@ -1464,7 +1464,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) __u16 nfid = open_file->netfid; __u32 npid = open_file->pid; rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, - nfid, npid, FALSE); + nfid, npid, false); atomic_dec(&open_file->wrtPending); cFYI(1, ("SetFSize for attrs rc = %d", rc)); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { @@ -1484,14 +1484,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) it was found or because there was an error setting it by handle */ rc = CIFSSMBSetEOF(xid, pTcon, full_path, - attrs->ia_size, FALSE, + attrs->ia_size, false, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { __u16 netfid; - int oplock = FALSE; + int oplock = 0; rc = SMBLegacyOpen(xid, pTcon, full_path, FILE_OPEN, @@ -1516,7 +1516,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* Server is ok setting allocation size implicitly - no need to call: - CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE, + CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true, cifs_sb->local_nls); */ @@ -1564,7 +1564,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #endif /* not writeable */ if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = TRUE; + set_dosattr = true; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | ATTR_READONLY); @@ -1574,7 +1574,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) not be able to write to it - so if any write bit is enabled for user or group or other we need to at least try to remove r/o dos attr */ - set_dosattr = TRUE; + set_dosattr = true; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & (~ATTR_READONLY)); /* Windows ignores set to zero */ @@ -1588,14 +1588,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } if (attrs->ia_valid & ATTR_ATIME) { - set_time = TRUE; + set_time = true; time_buf.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); } else time_buf.LastAccessTime = 0; if (attrs->ia_valid & ATTR_MTIME) { - set_time = TRUE; + set_time = true; time_buf.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); } else @@ -1606,7 +1606,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) server times */ if (set_time && (attrs->ia_valid & ATTR_CTIME)) { - set_time = TRUE; + set_time = true; /* Although Samba throws this field away it may be useful to Windows - but we do not want to set ctime unless some other @@ -1630,7 +1630,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = -EOPNOTSUPP; if (rc == -EOPNOTSUPP) { - int oplock = FALSE; + int oplock = 0; __u16 netfid; cFYI(1, ("calling SetFileInfo since SetPathInfo for " diff --git a/fs/cifs/link.c b/fs/cifs/link.c index d4e7ec93285..1c2c3ce5020 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -230,7 +230,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) struct inode *inode = direntry->d_inode; int rc = -EACCES; int xid; - int oplock = FALSE; + int oplock = 0; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 2a42d9fedbb..1d69b8014e0 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -496,7 +496,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) } return 0; } -int + +bool is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) { struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; @@ -522,17 +523,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pnotify->Action)); /* BB removeme BB */ /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ - return TRUE; + return true; } if (pSMBr->hdr.Status.CifsError) { cFYI(1, ("notify err 0x%d", pSMBr->hdr.Status.CifsError)); - return TRUE; + return true; } - return FALSE; + return false; } if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) - return FALSE; + return false; if (pSMB->hdr.Flags & SMBFLG_RESPONSE) { /* no sense logging error on invalid handle on oplock break - harmless race between close request and oplock @@ -541,21 +542,21 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) if ((NT_STATUS_INVALID_HANDLE) == le32_to_cpu(pSMB->hdr.Status.CifsError)) { cFYI(1, ("invalid handle on oplock break")); - return TRUE; + return true; } else if (ERRbadfid == le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { - return TRUE; + return true; } else { - return FALSE; /* on valid oplock brk we get "request" */ + return false; /* on valid oplock brk we get "request" */ } } if (pSMB->hdr.WordCount != 8) - return FALSE; + return false; cFYI(1, ("oplock type 0x%d level 0x%d", pSMB->LockType, pSMB->OplockLevel)); if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) - return FALSE; + return false; /* look up tcon based on tid & uid */ read_lock(&GlobalSMBSeslock); @@ -573,11 +574,11 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) ("file id match, oplock break")); pCifsInode = CIFS_I(netfile->pInode); - pCifsInode->clientCanCacheAll = FALSE; + pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead - = FALSE; - pCifsInode->oplockPending = TRUE; + = false; + pCifsInode->oplockPending = true; AllocOplockQEntry(netfile->pInode, netfile->netfid, tcon); @@ -585,17 +586,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) ("about to wake up oplock thread")); if (oplockThread) wake_up_process(oplockThread); - return TRUE; + return true; } } read_unlock(&GlobalSMBSeslock); cFYI(1, ("No matching file for oplock break")); - return TRUE; + return true; } } read_unlock(&GlobalSMBSeslock); cFYI(1, ("Can not process oplock break for non-existent connection")); - return TRUE; + return true; } void diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 32b445edc88..34ec32100c7 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -447,8 +447,8 @@ static int initiate_cifs_search(const int xid, struct file *file) if (file->private_data == NULL) return -ENOMEM; cifsFile = file->private_data; - cifsFile->invalidHandle = TRUE; - cifsFile->srch_inf.endOfSearch = FALSE; + cifsFile->invalidHandle = true; + cifsFile->srch_inf.endOfSearch = false; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); if (cifs_sb == NULL) @@ -485,7 +485,7 @@ ffirst_retry: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) - cifsFile->invalidHandle = FALSE; + cifsFile->invalidHandle = false; if ((rc == -EOPNOTSUPP) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; @@ -670,7 +670,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, ("search backing up - close and restart search")); - cifsFile->invalidHandle = TRUE; + cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; @@ -692,7 +692,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, } while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && - (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)) { + (rc == 0) && !cifsFile->srch_inf.endOfSearch) { cFYI(1, ("calling findnext2")); rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, &cifsFile->srch_inf); @@ -1038,7 +1038,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) break; } } /* else { - cifsFile->invalidHandle = TRUE; + cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); } kfree(cifsFile->search_resume_name); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 58bbfd992cc..ff3232fa101 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -35,11 +35,11 @@ #include "cifs_debug.h" #include "cifsencrypt.h" -#ifndef FALSE -#define FALSE 0 +#ifndef false +#define false 0 #endif -#ifndef TRUE -#define TRUE 1 +#ifndef true +#define true 1 #endif /* following came from the other byteorder.h to avoid include conflicts */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 8cd6a445b01..e9527eedc63 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -264,7 +264,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, #ifdef CONFIG_CIFS_EXPERIMENTAL else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { __u16 fid; - int oplock = FALSE; + int oplock = 0; struct cifs_ntsd *pacl = NULL; __u32 buflen = 0; if (experimEnabled) -- cgit v1.2.3 From c3270e577c18b3d0e984c3371493205a4807db9d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 24 Apr 2008 12:52:20 +0200 Subject: relay: fix splice problem Splice isn't always incrementing the ppos correctly, which broke relay splice. Signed-off-by: Tom Zanussi Signed-off-by: Jens Axboe --- fs/splice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index eeb1a86a701..633f58ebfb7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } -- cgit v1.2.3 From 68154e90c9d1492d570671ae181d9a8f8530da55 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Apr 2008 12:47:50 +0200 Subject: block: add dma alignment and padding support to blk_rq_map_kern This patch adds bio_copy_kern similar to bio_copy_user. blk_rq_map_kern uses bio_copy_kern instead of bio_map_kern if necessary. bio_copy_kern uses temporary pages and the bi_end_io callback frees these pages. bio_copy_kern saves the original kernel buffer at bio->bi_private it doesn't use something like struct bio_map_data to store the information about the caller. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Signed-off-by: Jens Axboe --- fs/bio.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 6e0b6f66df0..799f86deff2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, return ERR_PTR(-EINVAL); } +static void bio_copy_kern_endio(struct bio *bio, int err) +{ + struct bio_vec *bvec; + const int read = bio_data_dir(bio) == READ; + char *p = bio->bi_private; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *addr = page_address(bvec->bv_page); + + if (read && !err) + memcpy(p, addr, bvec->bv_len); + + __free_page(bvec->bv_page); + p += bvec->bv_len; + } + + bio_put(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + struct bio *bio; + struct bio_vec *bvec; + int i, ret; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + goto cleanup; + } + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { + ret = -EINVAL; + goto cleanup; + } + + len -= bytes; + } + + if (!reading) { + void *p = data; + + bio_for_each_segment(bvec, bio, i) { + char *addr = page_address(bvec->bv_page); + + memcpy(addr, p, bvec->bv_len); + p += bvec->bv_len; + } + } + + bio->bi_private = data; + bio->bi_end_io = bio_copy_kern_endio; + return bio; +cleanup: + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); + + bio_put(bio); + + return ERR_PTR(ret); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); +EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); -- cgit v1.2.3 From cdac75e6f2fec9abc21d0abb4e5d80720eeebb10 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 29 Apr 2008 00:58:34 -0700 Subject: epoll: avoid kmemcheck warning Epoll calls rb_set_parent(n, n) to initialize the rb-tree node, but rb_set_parent() accesses node's pointer in its code. This creates a warning in kmemcheck (reported by Vegard Nossum) about an uninitialized memory access. The warning is harmless since the following rb-tree node insert is going to overwrite the node data. In any case I think it's better to not have that happening at all, and fix it by simplifying the code to get rid of a few lines that became superfluous after the previous epoll changes. Signed-off-by: Davide Libenzi Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a415f42d32c..0d237182d72 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the RB tree node to detect linkage */ -static inline void ep_rb_initnode(struct rb_node *n) -{ - rb_set_parent(n, n); -} - -/* Removes a node from the RB tree and marks it for a fast is-linked check */ -static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) -{ - rb_erase(n, r); - rb_set_parent(n, n); -} - -/* Fast check to verify that the item is linked to the main RB tree */ -static inline int ep_rb_linked(struct rb_node *n) -{ - return rb_parent(n) != n; -} - /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct list_head *p) { @@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p) } /* Get the "struct epitem" from a wait queue pointer */ -static inline struct epitem * ep_item_from_wait(wait_queue_t *p) +static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } /* Get the "struct epitem" from an epoll queue wrapper */ -static inline struct epitem * ep_item_from_epqueue(poll_table *p) +static inline struct epitem *ep_item_from_epqueue(poll_table *p) { return container_of(p, struct ep_pqueue, pt)->epi; } @@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->fllink); spin_unlock(&file->f_ep_lock); - if (ep_rb_linked(&epi->rbn)) - ep_rb_erase(&epi->rbn, &ep->rbr); + rb_erase(&epi->rbn, &ep->rbr); spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) @@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, goto error_return; /* Item initialization follow here ... */ - ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->pwqlist); -- cgit v1.2.3 From e5949050f2610fa526b154e0d8379218e54f49d1 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 00:58:41 -0700 Subject: adfs: work around bogus sparse warning fs/adfs/dir_f.c:126:4: warning: do-while statement is not a compound statement Signed-off-by: Harvey Harrison Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/adfs/dir_f.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index b9b2b27b68c..ea7df214692 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir) ptr.ptr8 = bufoff(bh, i); end.ptr8 = ptr.ptr8 + last - i; - do + do { dircheck = *ptr.ptr8++ ^ ror13(dircheck); - while (ptr.ptr8 < end.ptr8); + } while (ptr.ptr8 < end.ptr8); } /* -- cgit v1.2.3 From 9fe76c763f0e18582bcb670c386978e83a755d05 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 00:58:44 -0700 Subject: coda: add static to functions in dir.c coda_unlink, coda_rmdir, coda_readdir can all be static, the forward declarations already were. Signed-off-by: Harvey Harrison Cc: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/coda/dir.c b/fs/coda/dir.c index f89ff083079..3d2580e00a3 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, } /* destruction routines: unlink, rmdir */ -int coda_unlink(struct inode *dir, struct dentry *de) +static int coda_unlink(struct inode *dir, struct dentry *de) { int error; const char *name = de->d_name.name; @@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de) return 0; } -int coda_rmdir(struct inode *dir, struct dentry *de) +static int coda_rmdir(struct inode *dir, struct dentry *de) { const char *name = de->d_name.name; int len = de->d_name.len; @@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, /* file operations for directories */ -int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) +static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) { struct coda_file_info *cfi; struct file *host_file; -- cgit v1.2.3 From 63e3453e547b20321381b212cb1ee11537dc843d Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 00:58:44 -0700 Subject: befs: fix sparse warning in linuxvfs.c Use link as the variable name to avoid shadowing the arg. fs/befs/linuxvfs.c:492:8: warning: symbol 'p' shadows an earlier one fs/befs/linuxvfs.c:488:77: originally declared here Signed-off-by: Harvey Harrison Cc: "Sergey S. Kostyliov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/befs/linuxvfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 82123ff3e1d..e8717de3bab 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - char *p = nd_get_link(nd); - if (!IS_ERR(p)) - kfree(p); + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); } } -- cgit v1.2.3 From 4488c59c942bd6004fc97f0c2a7603a2f5dd80e0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:51 -0700 Subject: fs/ramfs/ extern cleanup - internal.h shouldn't duplicate the extern declaration for ramfs_file_operations already in include/linux/ramfs.h - file-mmu.c needs two #include's for seeing the extern declarations of it's global struct's Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/file-mmu.c | 3 +++ fs/ramfs/internal.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index b41a514b097..9590b902430 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -26,6 +26,9 @@ #include #include +#include + +#include "internal.h" const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index af7cc074a47..6b330639b51 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -11,5 +11,4 @@ extern const struct address_space_operations ramfs_aops; -extern const struct file_operations ramfs_file_operations; extern const struct inode_operations ramfs_file_inode_operations; -- cgit v1.2.3 From 4b0a8da7a7bbe7f84c7bd16a5e965a129f461881 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:52 -0700 Subject: fs/hfsplus/: proper externs Add proper extern declarations for two structs in fs/hfsplus/hfsplus_fs.h Signed-off-by: Adrian Bunk Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/hfsplus_fs.h | 4 ++++ fs/hfsplus/inode.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d72d0a8b25a..9e59537b43d 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *); int hfsplus_rename_cat(u32, struct inode *, struct qstr *, struct inode *, struct qstr *); +/* dir.c */ +extern const struct inode_operations hfsplus_dir_inode_operations; +extern const struct file_operations hfsplus_dir_operations; + /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); void hfsplus_ext_write_extent(struct inode *); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 37744cf3706..d53b2af91c2 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -278,9 +278,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) return 0; } -extern const struct inode_operations hfsplus_dir_inode_operations; -extern struct file_operations hfsplus_dir_operations; - static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, -- cgit v1.2.3 From 8b1919a1e8b8968e0ac9030a4f14f0d2cd69e7cf Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:54 -0700 Subject: fs/freevxfs/: proper externs Move the extern declarations of several structs to vxfs_extern.h Signed-off-by: Adrian Bunk Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/freevxfs/vxfs_extern.h | 5 +++++ fs/freevxfs/vxfs_immed.c | 1 + fs/freevxfs/vxfs_inode.c | 5 ----- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 2b46064f66b..50ab5eecb99 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long); /* vxfs_fshead.c */ extern int vxfs_read_fshead(struct super_block *); +/* vxfs_immed.c */ +extern const struct inode_operations vxfs_immed_symlink_iops; + /* vxfs_inode.c */ +extern const struct address_space_operations vxfs_immed_aops; extern struct kmem_cache *vxfs_inode_cachep; extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); extern struct inode * vxfs_get_fake_inode(struct super_block *, @@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations; extern int vxfs_read_olt(struct super_block *, u_long); /* vxfs_subr.c */ +extern const struct address_space_operations vxfs_aops; extern struct page * vxfs_get_page(struct address_space *, u_long); extern void vxfs_put_page(struct page *); extern struct buffer_head * vxfs_bread(struct inode *, int); diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 8a5959a61ba..c36aeaf92e4 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -35,6 +35,7 @@ #include #include "vxfs.h" +#include "vxfs_extern.h" #include "vxfs_inode.h" diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ad88d2364bc..9f3f2ceb73f 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -41,11 +41,6 @@ #include "vxfs_extern.h" -extern const struct address_space_operations vxfs_aops; -extern const struct address_space_operations vxfs_immed_aops; - -extern const struct inode_operations vxfs_immed_symlink_iops; - struct kmem_cache *vxfs_inode_cachep; -- cgit v1.2.3 From 6b09ae66922ca198e5830c0a4d74400a507a9170 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:54 -0700 Subject: make __put_super() static Make the needlessly global __put_super() static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index a5a4aca7e22..453877c5697 100644 --- a/fs/super.c +++ b/fs/super.c @@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s) * Drop a superblock's refcount. Returns non-zero if the superblock was * destroyed. The caller must hold sb_lock. */ -int __put_super(struct super_block *sb) +static int __put_super(struct super_block *sb) { int ret = 0; -- cgit v1.2.3 From 67cde595374dd0e4e4a537dbf9dff70fd3d7bd7b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:55 -0700 Subject: make vfs_ioctl() static Make the needlessly global vfs_ioctl() static. Signed-off-by: Adrian Bunk Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ioctl.c b/fs/ioctl.c index f32fbde2175..7db32b3382d 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -28,8 +28,8 @@ * * Returns 0 on success, -errno on error. */ -long vfs_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +static long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; -- cgit v1.2.3 From f11b00f3bd89c91c684d56b2082d1b0241ff20ae Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:56 -0700 Subject: fs/fs-writeback.c: make 2 functions static Make the following needlessly global functions static: - writeback_acquire() - writeback_release() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 78 +++++++++++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06557679ca4..ae45f77765c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -25,6 +25,45 @@ #include #include "internal.h" + +/** + * writeback_acquire - attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +static int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress - determine whether there is writeback in progress + * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release - relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +static void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int return err; } - EXPORT_SYMBOL(generic_osync_inode); - -/** - * writeback_acquire - attempt to get exclusive writeback access to a device - * @bdi: the device's backing_dev_info structure - * - * It is a waste of resources to have more than one pdflush thread blocked on - * a single request queue. Exclusion at the request_queue level is obtained - * via a flag in the request_queue's backing_dev_info.state. - * - * Non-request_queue-backed address_spaces will share default_backing_dev_info, - * unless they implement their own. Which is somewhat inefficient, as this - * may prevent concurrent writeback against multiple devices. - */ -int writeback_acquire(struct backing_dev_info *bdi) -{ - return !test_and_set_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_in_progress - determine whether there is writeback in progress - * @bdi: the device's backing_dev_info structure. - * - * Determine whether there is writeback in progress against a backing device. - */ -int writeback_in_progress(struct backing_dev_info *bdi) -{ - return test_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_release - relinquish exclusive writeback access against a device. - * @bdi: the device's backing_dev_info structure - */ -void writeback_release(struct backing_dev_info *bdi) -{ - BUG_ON(!writeback_in_progress(bdi)); - clear_bit(BDI_pdflush, &bdi->state); -} -- cgit v1.2.3 From 07d45da616f8514651360b502314fc9554223a03 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:57 -0700 Subject: fs/drop_caches.c: make 2 functions static Make the following needlessly global functions static: - drop_pagecache() - drop_slab() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 59375efcf39..e2c6b6500c7 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -25,7 +25,7 @@ static void drop_pagecache_sb(struct super_block *sb) spin_unlock(&inode_lock); } -void drop_pagecache(void) +static void drop_pagecache(void) { struct super_block *sb; @@ -45,7 +45,7 @@ restart: spin_unlock(&sb_lock); } -void drop_slab(void) +static void drop_slab(void) { int nr_objects; -- cgit v1.2.3 From d5470b596abdd566339b2417e807b1198be64b97 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:57 -0700 Subject: fs/aio.c: make 3 functions static Make the following needlessly global functions static: - __put_ioctx() - lookup_ioctx() - io_submit_one() Signed-off-by: Adrian Bunk Cc: Zach Brown Cc: Benjamin LaHaise Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 67 +++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index ae94e1dea26..81c01290939 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) + +/* __put_ioctx + * Called when the last user of an aio context has gone away, + * and the struct needs to be freed. + */ +static void __put_ioctx(struct kioctx *ctx) +{ + unsigned nr_events = ctx->max_reqs; + + BUG_ON(ctx->reqs_active); + + cancel_delayed_work(&ctx->wq); + cancel_work_sync(&ctx->wq.work); + aio_free_ring(ctx); + mmdrop(ctx->mm); + ctx->mm = NULL; + pr_debug("__put_ioctx: freeing %p\n", ctx); + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} + +#define get_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + atomic_inc(&(kioctx)->users); \ +} while (0) +#define put_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ + __put_ioctx(kioctx); \ +} while (0) + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm) } } -/* __put_ioctx - * Called when the last user of an aio context has gone away, - * and the struct needs to be freed. - */ -void __put_ioctx(struct kioctx *ctx) -{ - unsigned nr_events = ctx->max_reqs; - - BUG_ON(ctx->reqs_active); - - cancel_delayed_work(&ctx->wq); - cancel_work_sync(&ctx->wq.work); - aio_free_ring(ctx); - mmdrop(ctx->mm); - ctx->mm = NULL; - pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } -} - /* aio_get_req * Allocate a slot for an aio request. Increments the users count * of the kioctx so that the kioctx stays around until all requests are @@ -545,7 +556,7 @@ int aio_put_req(struct kiocb *req) /* Lookup an ioctx id. ioctx_list is lockless for reads. * FIXME: this is O(n) and is only suitable for development. */ -struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; struct mm_struct *mm; @@ -1552,7 +1563,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, +static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb) { struct kiocb *req; -- cgit v1.2.3 From 45cc2b96f20fa27088a650587e5d9dc5fa5e32c0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:59 -0700 Subject: fs/timerfd.c should #include Every file should include the headers containing the prototypes for its global functions (in this case for sys_timerfd_*()). Signed-off-by: Adrian Bunk Cc: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/timerfd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/timerfd.c b/fs/timerfd.c index 10c80b59ec4..5400524e9cb 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -20,6 +20,7 @@ #include #include #include +#include struct timerfd_ctx { struct hrtimer tmr; -- cgit v1.2.3 From 946a57b526a16e5662235cb8f573337bc8ecdc48 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:00 -0700 Subject: remove generic_commit_write() Remove the obsolete and no longer used generic_commit_write(). Signed-off-by: Adrian Bunk Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 3db4a26adc4..22ed55198f3 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) return 0; } -int generic_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - __block_commit_write(inode,page,from,to); - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - */ - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must @@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); -EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); -- cgit v1.2.3 From f1e3af72c10ba74fb15864c354515ec1bd8bf2a5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:01 -0700 Subject: make fs/buffer.c:cont_expand_zero() static cont_expand_zero() can become static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 22ed55198f3..189efa4efc6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2211,8 +2211,8 @@ out: return err; } -int cont_expand_zero(struct file *file, struct address_space *mapping, - loff_t pos, loff_t *bytes) +static int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; unsigned blocksize = 1 << inode->i_blkbits; -- cgit v1.2.3 From 3202e1811fd312f3f32ddc8f526aa2691b64ec55 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:02 -0700 Subject: make BINFMT_FLAT a bool I have not yet seen anyone saying he has a reasonable use case for using BINFMT_FLAT modular on his embedded device. Considering that fs/binfmt_flat.c even lacks a MODULE_LICENSE() I really doubt there is any, and this patch therefore makes BINFMT_FLAT a bool. Signed-off-by: Adrian Bunk Acked-by: Bryan Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 2 +- fs/binfmt_flat.c | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 853845abcca..55e8ee1900a 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC It is also possible to run FDPIC ELF binaries on MMU linux also. config BINFMT_FLAT - tristate "Kernel support for flat binaries" + bool "Kernel support for flat binaries" depends on !MMU help Support uClinux FLAT format binaries. diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 0498b181dd5..c12cc362fd3 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -932,14 +932,8 @@ static int __init init_flat_binfmt(void) return register_binfmt(&flat_format); } -static void __exit exit_flat_binfmt(void) -{ - unregister_binfmt(&flat_format); -} - /****************************************************************************/ core_initcall(init_flat_binfmt); -module_exit(exit_flat_binfmt); /****************************************************************************/ -- cgit v1.2.3 From f249fdd8c19ff65825c0be67212cdf22e556668e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 00:59:03 -0700 Subject: autofs4: fix sparse warning in root.c fs/autofs4/root.c:536:23: warning: symbol 'ino' shadows an earlier one fs/autofs4/root.c:510:22: originally declared here There is no need to redeclare, we are at the end of the loop and in the next iteration of the loop, ino will be reset. Signed-off-by: Harvey Harrison Acked-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a54a946a50a..aa4c5ff8a40 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -533,9 +533,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct goto next; if (d_unhashed(dentry)) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); struct inode *inode = dentry->d_inode; + ino = autofs4_dentry_ino(dentry); list_del_init(&ino->rehash); dget(dentry); /* -- cgit v1.2.3 From 61d64576a21275114d6bffff3c1cac6c8e2f7cf2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 29 Apr 2008 00:59:05 -0700 Subject: fs: remove unused fops from struct char_device_struct struct char_device_struct::fops is no longer used: remove it. Signed-off-by: Jiri Olsa Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/char_dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index 038674aa88a..68e510b8845 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -55,7 +55,6 @@ static struct char_device_struct { unsigned int baseminor; int minorct; char name[64]; - struct file_operations *fops; struct cdev *cdev; /* will die */ } *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; -- cgit v1.2.3 From 6db27dd9d26fb270adaa4c265df65ccb49638bd0 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 29 Apr 2008 00:59:06 -0700 Subject: affs: handle match_strdup failure fs/affs/super.c (parse_options): Remove useless initialization. Handle match_strdup failure. Signed-off-by: Jim Meyering Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index d2dc047cb47..01d25d53254 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -199,7 +199,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s case Opt_prefix: /* Free any previous prefix */ kfree(*prefix); - *prefix = NULL; *prefix = match_strdup(&args[0]); if (!*prefix) return 0; @@ -233,6 +232,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_volume: { char *vol = match_strdup(&args[0]); + if (!vol) + return 0; strlcpy(volume, vol, 32); kfree(vol); break; -- cgit v1.2.3 From 3fbe5c31009d26c7b6b73d5c69fe930a5e9d2e26 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 29 Apr 2008 00:59:07 -0700 Subject: hfs: handle match_strdup failure fs/hfs/super.c (parse_options): Handle match_strdup failure, twice. Signed-off-by: Jim Meyering Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 32de44ed002..8cf67974adf 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_disk = load_nls(p); + if (p) + hsb->nls_disk = load_nls(p); if (!hsb->nls_disk) { printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); kfree(p); @@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_io = load_nls(p); + if (p) + hsb->nls_io = load_nls(p); if (!hsb->nls_io) { printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); kfree(p); -- cgit v1.2.3 From cd6fda36089cf3b450821228c2f575a3b5d0e7a7 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 29 Apr 2008 00:59:08 -0700 Subject: hfsplus: handle match_strdup failure fs/hfsplus/options.c (hfsplus_parse_options): Handle match_strdup failure. Signed-off-by: Jim Meyering Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/options.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index dc64fac0083..9997cbf8beb 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) return 0; } p = match_strdup(&args[0]); - sbi->nls = load_nls(p); + if (p) + sbi->nls = load_nls(p); if (!sbi->nls) { printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); kfree(p); -- cgit v1.2.3 From 8d4b69002e56e93f1cfe8bb863846ecde3990032 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 29 Apr 2008 00:59:12 -0700 Subject: fs/affs/file.c: use BUG_ON if (...) BUG(); should be replaced with BUG_ON(...) when the test has no side-effects to allow a definition of BUG_ON that drops the code completely. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @ disable unlikely @ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (unlikely(E)) { BUG(); } + BUG_ON(E); ) @@ expression E,f; @@ ( if (<... f(...) ...>) { BUG(); } | - if (E) { BUG(); } + BUG_ON(E); ) // Signed-off-by: Julia Lawall Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/file.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/affs/file.c b/fs/affs/file.c index 6e0c9399200..e87ede608f7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -325,8 +325,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); - if (block > (sector_t)0x7fffffffUL) - BUG(); + BUG_ON(block > (sector_t)0x7fffffffUL); if (block >= AFFS_I(inode)->i_blkcnt) { if (block > AFFS_I(inode)->i_blkcnt || !create) @@ -493,8 +492,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign u32 tmp; pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - if (from > to || to > PAGE_CACHE_SIZE) - BUG(); + BUG_ON(from > to || to > PAGE_CACHE_SIZE); kmap(page); data = page_address(page); bsize = AFFS_SB(sb)->s_data_blksize; @@ -507,8 +505,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (from + tmp > to || tmp > bsize) - BUG(); + BUG_ON(from + tmp > to || tmp > bsize); memcpy(data + from, AFFS_DATA(bh) + boff, tmp); affs_brelse(bh); bidx++; @@ -540,8 +537,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, newsize - size); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memset(AFFS_DATA(bh) + boff, 0, tmp); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); affs_fix_checksum(sb, bh); @@ -560,8 +556,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) goto out; tmp = min(bsize, newsize - size); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); @@ -683,8 +678,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); affs_fix_checksum(sb, bh); @@ -732,8 +726,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) goto out; tmp = min(bsize, to - from); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); -- cgit v1.2.3 From 175a06ae300188af8a61db68a78e1af44dc7d44f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 29 Apr 2008 00:59:17 -0700 Subject: exec: remove argv_len from struct linux_binprm I noticed that 2.6.24.2 calculates bprm->argv_len at do_execve(). But it doesn't update bprm->argv_len after "remove_arg_zero() + copy_strings_kernel()" at load_script() etc. audit_bprm() is called from search_binary_handler() and search_binary_handler() is called from load_script() etc. Thus, I think the condition check if (bprm->argv_len > (audit_argv_kb << 10)) return -E2BIG; in audit_bprm() might return wrong result when strlen(removed_arg) != strlen(spliced_args). Why not update bprm->argv_len at load_script() etc. ? By the way, 2.6.25-rc3 seems to not doing the condition check. Is the field bprm->argv_len no longer needed? Signed-off-by: Tetsuo Handa Cc: Ollie Wild Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index b152029f18f..7768453dc98 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1268,7 +1268,6 @@ int do_execve(char * filename, { struct linux_binprm *bprm; struct file *file; - unsigned long env_p; struct files_struct *displaced; int retval; @@ -1321,11 +1320,9 @@ int do_execve(char * filename, if (retval < 0) goto out; - env_p = bprm->p; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; - bprm->argv_len = env_p - bprm->p; retval = search_binary_handler(bprm,regs); if (retval >= 0) { -- cgit v1.2.3 From a2d416dcc92e576d0e339efd641bd3d8ee2bfb4d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 29 Apr 2008 00:59:22 -0700 Subject: codafs: fix build warning powerpc: fs/coda/coda_linux.c: In function 'coda_iattr_to_vattr': fs/coda/coda_linux.c:137: warning: large integer implicitly truncated to unsigned type Cc: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/coda_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 95a54253c04..e1c854890f9 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) unsigned int valid; /* clean out */ - vattr->va_mode = (umode_t) -1; + vattr->va_mode = -1; vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; -- cgit v1.2.3 From 3a2e7f47d71e1df86acc1dda6826890b6546a4e1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 00:59:24 -0700 Subject: binfmt_misc.c: avoid potential kernel stack overflow This can be triggered with root help only, but... Register the ":text:E::txt::/root/cat.txt:' rule in binfmt_misc (by root) and try launching the cat.txt file (by anyone) :) The result is - the endless recursion in the load_misc_binary -> open_exec -> load_misc_binary chain and stack overflow. There's a similar problem with binfmt_script, and there's a sh_bang memner on linux_binprm structure to handle this, but simply raising this in binfmt_misc may break some setups when the interpreter of some misc binaries is a script. So the proposal is to turn sh_bang into a bit, add a new one (the misc_bang) and raise it in load_misc_binary. After this, even if we set up the misc -> script -> misc loop for binfmts one of them will step on its own bang and exit. Signed-off-by: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_em86.c | 2 +- fs/binfmt_misc.c | 6 ++++++ fs/binfmt_script.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f95ae9789c9..f9c88d0c8ce 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index dbf0ac0523d..7191306367c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; + retval = -ENOEXEC; + if (bprm->misc_bang) + goto _ret; + + bprm->misc_bang = 1; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index ab33939b12a..9e3963f7ebf 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang++; + bprm->sh_bang = 1; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; -- cgit v1.2.3 From 2e50b6ccdaaf0d933bb9d8409cac4b2f088f5a2f Mon Sep 17 00:00:00 2001 From: "S.Caglar Onur" Date: Tue, 29 Apr 2008 00:59:26 -0700 Subject: fs/binfmt_aout.c: use printk_ratelimit() Use printk_ratelimit() instead of jiffies based arithmetic, suggested by Geert Uytterhoeven Signed-off-by: S.Caglar Onur Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_aout.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a1bb2244cac..ba4cddb92f1 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { - static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) { printk(KERN_NOTICE "executable not page aligned\n"); - error_time2 = jiffies; } - if ((fd_offset & ~PAGE_MASK) != 0 && - (jiffies-error_time) > 5*HZ) + if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit()) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", bprm->file->f_path.dentry->d_name.name); - error_time = jiffies; } if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { @@ -495,15 +491,13 @@ static int load_aout_library(struct file *file) start_addr = ex.a_entry & 0xfffff000; if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - static unsigned long error_time; loff_t pos = N_TXTOFF(ex); - if ((jiffies-error_time) > 5*HZ) + if (printk_ratelimit()) { printk(KERN_WARNING "N_TXTOFF is not page aligned. Please convert library: %s\n", file->f_path.dentry->d_name.name); - error_time = jiffies; } down_write(¤t->mm->mmap_sem); do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); -- cgit v1.2.3 From e1d2c8b69ad81ea103b1e87809eba51931e16874 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 00:59:34 -0700 Subject: fdpic: check that the size returned by kernel_read() is what we asked for Check that the size of the read returned by kernel_read() is what we asked for. If it isn't, then reject the binary as being a badly formatted. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 32649f2a165..ddd35d87339 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); - if (retval < 0) - return retval; + if (unlikely(retval != size)) + return retval < 0 ? retval : -ENOEXEC; /* determine stack size for this binary */ phdr = params->phdrs; @@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, phdr->p_offset, interpreter_name, phdr->p_filesz); - if (retval < 0) + if (unlikely(retval != phdr->p_filesz)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } retval = -ENOENT; if (interpreter_name[phdr->p_filesz - 1] != '\0') @@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); - if (retval < 0) + if (unlikely(retval != BINPRM_BUF_SIZE)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } interp_params.hdr = *((struct elfhdr *) bprm->buf); break; -- cgit v1.2.3 From eccb95cee4f0d56faa46ef22fb94dd4a3578d3eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Apr 2008 00:59:37 -0700 Subject: vfs: fix lock inversion in drop_pagecache_sb() Fix longstanding lock inversion in drop_pagecache_sb by dropping inode_lock before calling __invalidate_mapping_pages(). We just have to make sure inode won't go away from under us by keeping reference to it and putting the reference only after we have safely resumed the scan of the inode list. A bit tricky but not too bad... Signed-off-by: Jan Kara Cc: Fengguang Wu Cc: David Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index e2c6b6500c7..50f9087635d 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,15 +14,21 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb) { - struct inode *inode; + struct inode *inode, *toput_inode = NULL; spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_WILL_FREE)) continue; + __iget(inode); + spin_unlock(&inode_lock); __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); + iput(toput_inode); + toput_inode = inode; + spin_lock(&inode_lock); } spin_unlock(&inode_lock); + iput(toput_inode); } static void drop_pagecache(void) -- cgit v1.2.3 From af065b8a19041554196971d8b6ae1459798d3b14 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Apr 2008 00:59:39 -0700 Subject: vfs: skip inodes without pages to free in drop_pagecache_sb() Many inodes have no pagecache, so we can avoid lots of lock-takings. Signed-off-by: Jan Kara Cc: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 50f9087635d..3e5637fc377 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -20,6 +20,8 @@ static void drop_pagecache_sb(struct super_block *sb) list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_WILL_FREE)) continue; + if (inode->i_mapping->nrpages == 0) + continue; __iget(inode); spin_unlock(&inode_lock); __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); -- cgit v1.2.3 From c5c8be3ce59dc59baf20b33dae3f8eb70af7b1f1 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 29 Apr 2008 00:59:40 -0700 Subject: fs/inode.c: use hlist_for_each_entry() fs/inode.c: use hlist_for_each_entry() in find_inode() and find_inode_fast() [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 27ee1af50d0..bf647813042 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) @@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) -- cgit v1.2.3 From 7ec02ef1596bb3c829a7e8b65ebf13b87faf1819 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Tue, 29 Apr 2008 00:59:40 -0700 Subject: vfs: remove lives_below_in_same_fs() Remove lives_below_in_same_fs() since is_subdir() from fs/dcache.c is providing the same functionality. Signed-off-by: Jan Blunck Acked-by: Miklos Szeredi Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index fe376805cf5..061e5edb4d2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd) #endif } -static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) -{ - while (1) { - if (d == dentry) - return 1; - if (d == NULL || d == d->d_parent) - return 0; - d = d->d_parent; - } -} - struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, int flag) { @@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, p = mnt; list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { - if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) + if (!is_subdir(r->mnt_mountpoint, dentry)) continue; for (s = r; s; s = next_mnt(s, r)) { -- cgit v1.2.3 From 8f0cfa52a1d4ffacd8e7de906d19662f5da58d58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 00:59:41 -0700 Subject: xattr: add missing consts to function arguments Add missing consts to xattr function arguments. Signed-off-by: David Howells Cc: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 89a942f07e1..4706a8b1f49 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) } int -vfs_setxattr(struct dentry *dentry, char *name, void *value, +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; @@ -131,7 +131,7 @@ out_noalloc: EXPORT_SYMBOL_GPL(xattr_getsecurity); ssize_t -vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; @@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size) EXPORT_SYMBOL_GPL(vfs_listxattr); int -vfs_removexattr(struct dentry *dentry, char *name) +vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; int error; @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); * Extended attribute SET operations */ static long -setxattr(struct dentry *d, char __user *name, void __user *value, +setxattr(struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; @@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } asmlinkage long -sys_setxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_lsetxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_fsetxattr(int fd, char __user *name, void __user *value, +sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags) { struct file *f; @@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, * Extended attribute GET operations */ static ssize_t -getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) +getxattr(struct dentry *d, const char __user *name, void __user *value, + size_t size) { ssize_t error; void *kvalue = NULL; @@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) } asmlinkage ssize_t -sys_getxattr(char __user *path, char __user *name, void __user *value, - size_t size) +sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size) { struct nameidata nd; ssize_t error; @@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_lgetxattr(char __user *path, char __user *name, void __user *value, +sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size) { struct nameidata nd; @@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) +sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; ssize_t error = -EBADF; @@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) } asmlinkage ssize_t -sys_listxattr(char __user *path, char __user *list, size_t size) +sys_listxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size) } asmlinkage ssize_t -sys_llistxattr(char __user *path, char __user *list, size_t size) +sys_llistxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size) * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, const char __user *name) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name) } asmlinkage long -sys_removexattr(char __user *path, char __user *name) +sys_removexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name) } asmlinkage long -sys_lremovexattr(char __user *path, char __user *name) +sys_lremovexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name) } asmlinkage long -sys_fremovexattr(int fd, char __user *name) +sys_fremovexattr(int fd, const char __user *name) { struct file *f; struct dentry *dentry; -- cgit v1.2.3 From 762873c251b056c6c1b29e83a4dabafb064e5421 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 29 Apr 2008 00:59:42 -0700 Subject: vfs: fix unconditional write_super() call in file_fsync() We need to check ->s_dirt before calling write_super(). It became the cause of an unneeded write. This bug was noticed by Sudhanshu Saxena. Signed-off-by: OGAWA Hirofumi Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sync.c b/fs/sync.c index 7cd005ea763..228e17b5e9e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) /* sync the superblock to buffers */ sb = inode->i_sb; lock_super(sb); - if (sb->s_op->write_super) + if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); -- cgit v1.2.3 From 05db67a4f2c14dab5bcaa46c7d4e9237bd11b37c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:47 -0700 Subject: remove ecryptfs_header_cache_0 Remove the no longer used ecryptfs_header_cache_0. Signed-off-by: Adrian Bunk Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/crypto.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a066e109ad9..dcbbb2b4455 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt, (*written) = 6; } -struct kmem_cache *ecryptfs_header_cache_0; struct kmem_cache *ecryptfs_header_cache_1; struct kmem_cache *ecryptfs_header_cache_2; -- cgit v1.2.3 From 18d1dbf1d401e8f9d74cf1cf799fdb19cff150c6 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 00:59:48 -0700 Subject: ecryptfs: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/crypto.c | 32 ++++++++++++++++---------------- fs/ecryptfs/ecryptfs_kernel.h | 2 +- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 2 +- fs/ecryptfs/mmap.c | 18 +++++++++--------- fs/ecryptfs/read_write.c | 16 ++++++++-------- 6 files changed, 36 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index dcbbb2b4455..cd62d75b2cc 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst, if (rc) { printk(KERN_ERR "%s: Error initializing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_update(&desc, &sg, len); if (rc) { printk(KERN_ERR "%s: Error updating crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_final(&desc, dst); if (rc) { printk(KERN_ERR "%s: Error finalizing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } out: @@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to encrypt page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page) 0, PAGE_CACHE_SIZE); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } ecryptfs_lower_offset_for_extent( @@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to decrypt to page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page) ecryptfs_inode); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } } @@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data, ecryptfs_inode); if (rc) { printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { @@ -1319,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, 0, crypt_stat->num_header_bytes_at_front); if (rc) printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", __FUNCTION__, + "information to lower file; rc = [%d]\n", __func__, rc); return rc; } @@ -1364,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) } } else { printk(KERN_WARNING "%s: Encrypted flag not set\n", - __FUNCTION__); + __func__); rc = -EINVAL; goto out; } /* Released in this function */ virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); if (!virt) { - printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); + printk(KERN_ERR "%s: Out of memory\n", __func__); rc = -ENOMEM; goto out; } @@ -1379,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry); if (unlikely(rc)) { printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) @@ -1390,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, virt); if (rc) { printk(KERN_ERR "%s: Error writing metadata out to lower file; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_free; } out_free: @@ -1584,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) if (!page_virt) { rc = -ENOMEM; printk(KERN_ERR "%s: Unable to allocate page_virt\n", - __FUNCTION__); + __func__); goto out; } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 5007f788da0..342e8d37b42 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -500,7 +500,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) } #define ecryptfs_printk(type, fmt, arg...) \ - __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); + __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2386115210..a7d5d7d2b2a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -121,7 +121,7 @@ ecryptfs_do_create(struct inode *directory_inode, ecryptfs_dentry, mode, nd); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_lock; } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d25ac9500a9..d603631601e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (rc) { printk(KERN_ERR "%s: Error attempting to initialize the " "persistent file for the dentry with name [%s]; " - "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); + "rc = [%d]\n", __func__, dentry->d_name.name, rc); goto out; } out: diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 6df1debdccc..2b6fe1e6e8b 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " - "region; rc = [%d]\n", __FUNCTION__, rc); + "region; rc = [%d]\n", __func__, rc); goto out; } } else { @@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, if (rc) { printk(KERN_ERR "%s: Error attempting to read " "extent at offset [%lld] in the lower " - "file; rc = [%d]\n", __FUNCTION__, + "file; rc = [%d]\n", __func__, lower_offset, rc); goto out; } @@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page) "the encrypted content from the lower " "file whilst inserting the metadata " "from the xattr into the header; rc = " - "[%d]\n", __FUNCTION__, rc); + "[%d]\n", __func__, rc); goto out; } @@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error attemping to read " "lower page segment; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } else @@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, "from the lower file whilst " "inserting the metadata from " "the xattr into the header; rc " - "= [%d]\n", __FUNCTION__, rc); + "= [%d]\n", __func__, rc); ClearPageUptodate(page); goto out; } @@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error reading " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } @@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error decrypting page " "at index [%ld]; rc = [%d]\n", - __FUNCTION__, page->index, rc); + __func__, page->index, rc); ClearPageUptodate(page); goto out; } @@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error on attempt to " "truncate to (higher) offset [%lld];" - " rc = [%d]\n", __FUNCTION__, + " rc = [%d]\n", __func__, prev_page_end_size, rc); goto out; } @@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) kfree(file_size_virt); if (rc) printk(KERN_ERR "%s: Error writing file size to header; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); out: return rc; } diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 0c4928623bb..ebf55150be5 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, set_fs(fs_save); if (octets_written < 0) { printk(KERN_ERR "%s: octets_written = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_written, size); + "expected [%td]\n", __func__, octets_written, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } @@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, if (rc) { printk(KERN_ERR "%s: Error decrypting " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; @@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, page_cache_release(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error encrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); goto out; } pos += num_bytes; @@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, set_fs(fs_save); if (octets_read < 0) { printk(KERN_ERR "%s: octets_read = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_read, size); + "expected [%td]\n", __func__, octets_read, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, printk(KERN_ERR "%s: Attempt to read data past the end of the " "file; offset = [%lld]; size = [%td]; " "ecryptfs_file_size = [%lld]\n", - __FUNCTION__, offset, size, ecryptfs_file_size); + __func__, offset, size, ecryptfs_file_size); goto out; } pos = offset; @@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } rc = ecryptfs_decrypt_page(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; -- cgit v1.2.3 From 9c3580aa52195699065bc2d7242b1c7e3e6903fa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 29 Apr 2008 00:59:48 -0700 Subject: ecryptfs: add missing lock around notify_change Callers of notify_change() need to hold i_mutex. Signed-off-by: Miklos Szeredi Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a7d5d7d2b2a..1623ebf25e5 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -908,7 +908,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ia->ia_valid &= ~ATTR_MODE; + mutex_lock(&lower_dentry->d_inode->i_mutex); rc = notify_change(lower_dentry, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode, NULL); return rc; -- cgit v1.2.3 From 8bf2debd5f7bf12d122124e34fec14af5b1e8ecf Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Tue, 29 Apr 2008 00:59:50 -0700 Subject: eCryptfs: introduce device handle for userspace daemon communications A regular device file was my real preference from the get-go, but I went with netlink at the time because I thought it would be less complex for managing send queues (i.e., just do a unicast and move on). It turns out that we do not really get that much complexity reduction with netlink, and netlink is more heavyweight than a device handle. In addition, the netlink interface to eCryptfs has been broken since 2.6.24. I am assuming this is a bug in how eCryptfs uses netlink, since the other in-kernel users of netlink do not seem to be having any problems. I have had one report of a user successfully using eCryptfs with netlink on 2.6.24, but for my own systems, when starting the userspace daemon, the initial helo message sent to the eCryptfs kernel module results in an oops right off the bat. I spent some time looking at it, but I have not yet found the cause. The netlink interface breaking gave me the motivation to just finish my patch to migrate to a regular device handle. If I cannot find out soon why the netlink interface in eCryptfs broke, I am likely to just send a patch to disable it in 2.6.24 and 2.6.25. I would like the device handle to be the preferred means of communicating with the userspace daemon from 2.6.26 on forward. This patch: Functions to facilitate reading and writing to the eCryptfs miscellaneous device handle. This will replace the netlink interface as the preferred mechanism for communicating with the userspace eCryptfs daemon. Each user has his own daemon, which registers itself by opening the eCryptfs device handle. Only one daemon per euid may be registered at any given time. The eCryptfs module sends a message to a daemon by adding its message to the daemon's outgoing message queue. The daemon reads the device handle to get the oldest message off the queue. Incoming messages from the userspace daemon are immediately handled. If the message is a response, then the corresponding process that is blocked waiting for the response is awakened. Signed-off-by: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 580 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 580 insertions(+) create mode 100644 fs/ecryptfs/miscdev.c (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c new file mode 100644 index 00000000000..72dfec48ea2 --- /dev/null +++ b/fs/ecryptfs/miscdev.c @@ -0,0 +1,580 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 2008 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +static atomic_t ecryptfs_num_miscdev_opens; + +/** + * ecryptfs_miscdev_poll + * @file: dev file (ignored) + * @pt: dev poll table (ignored) + * + * Returns the poll mask + */ +static unsigned int +ecryptfs_miscdev_poll(struct file *file, poll_table *pt) +{ + struct ecryptfs_daemon *daemon; + unsigned int mask = 0; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + printk(KERN_WARNING "%s: Attempt to poll on zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) + goto out_unlock_daemon; + if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL) + goto out_unlock_daemon; + daemon->flags |= ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + poll_wait(file, &daemon->wait, pt); + mutex_lock(&daemon->mux); + if (!list_empty(&daemon->msg_ctx_out_queue)) + mask |= POLLIN | POLLRDNORM; +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + return mask; +} + +/** + * ecryptfs_miscdev_open + * @inode: inode of miscdev handle (ignored) + * @file: file for miscdev handle (ignored) + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_open(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = try_module_get(THIS_MODULE); + if (rc == 0) { + rc = -EIO; + printk(KERN_ERR "%s: Error attempting to increment module use " + "count; rc = [%d]\n", __func__, rc); + goto out_unlock_daemon_list; + } + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + if (rc || !daemon) { + rc = ecryptfs_spawn_daemon(&daemon, current->euid, + current->pid); + if (rc) { + printk(KERN_ERR "%s: Error attempting to spawn daemon; " + "rc = [%d]\n", __func__, rc); + goto out_module_put_unlock_daemon_list; + } + } + mutex_lock(&daemon->mux); + if (daemon->pid != current->pid) { + rc = -EINVAL; + printk(KERN_ERR "%s: pid [%d] has registered with euid [%d], " + "but pid [%d] has attempted to open the handle " + "instead\n", __func__, daemon->pid, daemon->euid, + current->pid); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { + rc = -EBUSY; + printk(KERN_ERR "%s: Miscellaneous device handle may only be " + "opened once per daemon; pid [%d] already has this " + "handle open\n", __func__, daemon->pid); + goto out_unlock_daemon; + } + daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_inc(&ecryptfs_num_miscdev_opens); +out_unlock_daemon: + mutex_unlock(&daemon->mux); +out_module_put_unlock_daemon_list: + if (rc) + module_put(THIS_MODULE); +out_unlock_daemon_list: + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_miscdev_release + * @inode: inode of fs/ecryptfs/euid handle (ignored) + * @file: file for fs/ecryptfs/euid handle (ignored) + * + * This keeps the daemon registered until the daemon sends another + * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters. + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_release(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + BUG_ON(daemon->pid != current->pid); + BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); + daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_dec(&ecryptfs_num_miscdev_opens); + mutex_unlock(&daemon->mux); + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) { + printk(KERN_CRIT "%s: Fatal error whilst attempting to " + "shut down daemon; rc = [%d]. Please report this " + "bug.\n", __func__, rc); + BUG(); + } + module_put(THIS_MODULE); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_send_miscdev + * @data: Data to send to daemon; may be NULL + * @data_size: Amount of data to send to daemon + * @msg_ctx: Message context, which is used to handle the reply. If + * this is NULL, then we do not expect a reply. + * @msg_type: Type of message + * @msg_flags: Flags for message + * @daemon: eCryptfs daemon object + * + * Add msg_ctx to queue and then, if it exists, notify the blocked + * miscdevess about the data being available. Must be called with + * ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon) +{ + int rc = 0; + + mutex_lock(&msg_ctx->mux); + if (data) { + msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), + GFP_KERNEL); + if (!msg_ctx->msg) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kmalloc(%d, GFP_KERNEL)\n", __func__, + (sizeof(*msg_ctx->msg) + data_size)); + goto out_unlock; + } + } else + msg_ctx->msg = NULL; + msg_ctx->msg->index = msg_ctx->index; + msg_ctx->msg->data_len = data_size; + msg_ctx->type = msg_type; + if (data) { + memcpy(msg_ctx->msg->data, data, data_size); + msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); + } else + msg_ctx->msg_size = 0; + mutex_lock(&daemon->mux); + list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + daemon->num_queued_msg_ctx++; + wake_up_interruptible(&daemon->wait); + mutex_unlock(&daemon->mux); +out_unlock: + mutex_unlock(&msg_ctx->mux); + return rc; +} + +/** + * ecryptfs_miscdev_read - format and send message from queue + * @file: fs/ecryptfs/euid miscdevfs handle (ignored) + * @buf: User buffer into which to copy the next message on the daemon queue + * @count: Amount of space available in @buf + * @ppos: Offset in file (ignored) + * + * Pulls the most recent message from the daemon queue, formats it for + * being sent via a miscdevfs handle, and copies it into @buf + * + * Returns the number of bytes copied into the user buffer + */ +static int +ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct ecryptfs_daemon *daemon; + struct ecryptfs_msg_ctx *msg_ctx; + size_t packet_length_size; + u32 counter_nbo; + char packet_length[3]; + size_t i; + size_t total_length; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + printk(KERN_WARNING "%s: Attempt to read from zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { + rc = 0; + goto out_unlock_daemon; + } + /* This daemon will not go away so long as this flag is set */ + daemon->flags |= ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&ecryptfs_daemon_hash_mux); +check_list: + if (list_empty(&daemon->msg_ctx_out_queue)) { + mutex_unlock(&daemon->mux); + rc = wait_event_interruptible( + daemon->wait, !list_empty(&daemon->msg_ctx_out_queue)); + mutex_lock(&daemon->mux); + if (rc < 0) { + rc = 0; + goto out_unlock_daemon; + } + } + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + goto out_unlock_daemon; + } + if (list_empty(&daemon->msg_ctx_out_queue)) { + /* Something else jumped in since the + * wait_event_interruptable() and removed the + * message from the queue; try again */ + goto check_list; + } + BUG_ON(current->euid != daemon->euid); + BUG_ON(current->pid != daemon->pid); + msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, + struct ecryptfs_msg_ctx, daemon_out_list); + BUG_ON(!msg_ctx); + mutex_lock(&msg_ctx->mux); + if (msg_ctx->msg) { + rc = ecryptfs_write_packet_length(packet_length, + msg_ctx->msg_size, + &packet_length_size); + if (rc) { + rc = 0; + printk(KERN_WARNING "%s: Error writing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + } else { + packet_length_size = 0; + msg_ctx->msg_size = 0; + } + /* miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Octets 5-N1 not written if the packet type does not + * include a message */ + total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size); + if (count < total_length) { + rc = 0; + printk(KERN_WARNING "%s: Only given user buffer of " + "size [%Zd], but we need [%Zd] to read the " + "pending message\n", __func__, count, total_length); + goto out_unlock_msg_ctx; + } + i = 0; + buf[i++] = msg_ctx->type; + counter_nbo = cpu_to_be32(msg_ctx->counter); + memcpy(&buf[i], (char *)&counter_nbo, 4); + i += 4; + if (msg_ctx->msg) { + memcpy(&buf[i], packet_length, packet_length_size); + i += packet_length_size; + rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size); + if (rc) { + printk(KERN_ERR "%s: copy_to_user returned error " + "[%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + i += msg_ctx->msg_size; + } + rc = i; + list_del(&msg_ctx->daemon_out_list); + kfree(msg_ctx->msg); + msg_ctx->msg = NULL; + /* We do not expect a reply from the userspace daemon for any + * message type other than ECRYPTFS_MSG_REQUEST */ + if (msg_ctx->type != ECRYPTFS_MSG_REQUEST) + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); +out_unlock_msg_ctx: + mutex_unlock(&msg_ctx->mux); +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&daemon->mux); + return rc; +} + +/** + * ecryptfs_miscdev_helo + * @euid: effective user id of miscdevess sending helo packet + * @pid: miscdevess id of miscdevess sending helo packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_helo(uid_t uid, pid_t pid) +{ + int rc; + + rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, uid, pid); + if (rc) + printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_quit + * @euid: effective user id of miscdevess sending quit packet + * @pid: miscdevess id of miscdevess sending quit packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_quit(uid_t euid, pid_t pid) +{ + int rc; + + rc = ecryptfs_process_quit(euid, pid); + if (rc) + printk(KERN_WARNING + "Error processing QUIT message; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon + * @data: Bytes comprising struct ecryptfs_message + * @data_size: sizeof(struct ecryptfs_message) + data len + * @euid: Effective user id of miscdevess sending the miscdev response + * @pid: Miscdevess id of miscdevess sending the miscdev response + * @seq: Sequence number for miscdev response packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_response(char *data, size_t data_size, + uid_t euid, pid_t pid, u32 seq) +{ + struct ecryptfs_message *msg = (struct ecryptfs_message *)data; + int rc; + + if ((sizeof(*msg) + msg->data_len) != data_size) { + printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = " + "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__, + (sizeof(*msg) + msg->data_len), data_size); + rc = -EINVAL; + goto out; + } + rc = ecryptfs_process_response(msg, euid, pid, seq); + if (rc) + printk(KERN_ERR + "Error processing response message; rc = [%d]\n", rc); +out: + return rc; +} + +/** + * ecryptfs_miscdev_write - handle write to daemon miscdev handle + * @file: File for misc dev handle (ignored) + * @buf: Buffer containing user data + * @count: Amount of data in @buf + * @ppos: Pointer to offset in file (ignored) + * + * miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter (0's for non-response) + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Returns the number of bytes read from @buf + */ +static ssize_t +ecryptfs_miscdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + u32 counter_nbo, seq; + size_t packet_size, packet_size_length, i; + ssize_t sz = 0; + char *data; + int rc; + + if (count == 0) + goto out; + data = kmalloc(count, GFP_KERNEL); + if (!data) { + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count); + goto out; + } + rc = copy_from_user(data, buf, count); + if (rc) { + printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", + __func__, rc); + goto out_free; + } + sz = count; + i = 0; + switch (data[i++]) { + case ECRYPTFS_MSG_RESPONSE: + if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { + printk(KERN_WARNING "%s: Minimum acceptable packet " + "size is [%Zd], but amount of data written is " + "only [%Zd]. Discarding response packet.\n", + __func__, + (1 + 4 + 1 + sizeof(struct ecryptfs_message)), + count); + goto out_free; + } + memcpy((char *)&counter_nbo, &data[i], 4); + seq = be32_to_cpu(counter_nbo); + i += 4; + rc = ecryptfs_parse_packet_length(&data[i], &packet_size, + &packet_size_length); + if (rc) { + printk(KERN_WARNING "%s: Error parsing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_free; + } + i += packet_size_length; + if ((1 + 4 + packet_size_length + packet_size) != count) { + printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])" + " + packet_size([%Zd]))([%Zd]) != " + "count([%Zd]). Invalid packet format.\n", + __func__, packet_size_length, packet_size, + (1 + packet_size_length + packet_size), count); + goto out_free; + } + rc = ecryptfs_miscdev_response(&data[i], packet_size, + current->euid, + current->pid, seq); + if (rc) + printk(KERN_WARNING "%s: Failed to deliver miscdev " + "response to requesting operation; rc = [%d]\n", + __func__, rc); + break; + case ECRYPTFS_MSG_HELO: + rc = ecryptfs_miscdev_helo(current->euid, current->pid); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "helo from pid [%d]; rc = [%d]\n", __func__, + current->pid, rc); + goto out_free; + } + break; + case ECRYPTFS_MSG_QUIT: + rc = ecryptfs_miscdev_quit(current->euid, current->pid); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "quit from pid [%d]; rc = [%d]\n", __func__, + current->pid, rc); + goto out_free; + } + break; + default: + ecryptfs_printk(KERN_WARNING, "Dropping miscdev " + "message of unrecognized type [%d]\n", + data[0]); + break; + } +out_free: + kfree(data); +out: + return sz; +} + + +static const struct file_operations ecryptfs_miscdev_fops = { + .open = ecryptfs_miscdev_open, + .poll = ecryptfs_miscdev_poll, + .read = ecryptfs_miscdev_read, + .write = ecryptfs_miscdev_write, + .release = ecryptfs_miscdev_release, +}; + +static struct miscdevice ecryptfs_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ecryptfs", + .fops = &ecryptfs_miscdev_fops +}; + +/** + * ecryptfs_init_ecryptfs_miscdev + * + * Messages sent to the userspace daemon from the kernel are placed on + * a queue associated with the daemon. The next read against the + * miscdev handle by that daemon will return the oldest message placed + * on the message queue for the daemon. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_init_ecryptfs_miscdev(void) +{ + int rc; + + atomic_set(&ecryptfs_num_miscdev_opens, 0); + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = misc_register(&ecryptfs_miscdev); + if (rc) + printk(KERN_ERR "%s: Failed to register miscellaneous device " + "for communications with userspace daemons; rc = [%d]\n", + __func__, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_destroy_ecryptfs_miscdev + * + * All of the daemons must be exorcised prior to calling this + * function. + */ +void ecryptfs_destroy_ecryptfs_miscdev(void) +{ + BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0); + misc_deregister(&ecryptfs_miscdev); +} -- cgit v1.2.3 From f66e883eb6186bc43a79581b67aff7d1a69d0ff1 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Tue, 29 Apr 2008 00:59:51 -0700 Subject: eCryptfs: integrate eCryptfs device handle into the module. Update the versioning information. Make the message types generic. Add an outgoing message queue to the daemon struct. Make the functions to parse and write the packet lengths available to the rest of the module. Add functions to create and destroy the daemon structs. Clean up some of the comments and make the code a little more consistent with itself. [akpm@linux-foundation.org: printk fixes] Signed-off-by: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/Makefile | 2 +- fs/ecryptfs/ecryptfs_kernel.h | 79 +++++-- fs/ecryptfs/keystore.c | 89 ++++---- fs/ecryptfs/messaging.c | 479 ++++++++++++++++++++++++++++-------------- fs/ecryptfs/miscdev.c | 4 +- fs/ecryptfs/netlink.c | 8 +- 6 files changed, 435 insertions(+), 226 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 76885701551..1e34a7fd488 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 342e8d37b42..72e117706a6 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -4,7 +4,7 @@ * * Copyright (C) 1997-2003 Erez Zadok * Copyright (C) 2001-2003 Stony Brook University - * Copyright (C) 2004-2007 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow * Trevor S. Highland * Tyler Hicks @@ -49,11 +49,13 @@ #define ECRYPTFS_VERSIONING_POLICY 0x00000008 #define ECRYPTFS_VERSIONING_XATTR 0x00000010 #define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 +#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ECRYPTFS_VERSIONING_PUBKEY \ | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY) + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_DEVMISC) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 @@ -73,17 +75,14 @@ #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) -#define ECRYPTFS_NLMSG_HELO 100 -#define ECRYPTFS_NLMSG_QUIT 101 -#define ECRYPTFS_NLMSG_REQUEST 102 -#define ECRYPTFS_NLMSG_RESPONSE 103 #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 #define ECRYPTFS_DEFAULT_NUM_USERS 4 #define ECRYPTFS_MAX_NUM_USERS 32768 #define ECRYPTFS_TRANSPORT_NETLINK 0 #define ECRYPTFS_TRANSPORT_CONNECTOR 1 #define ECRYPTFS_TRANSPORT_RELAYFS 2 -#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK +#define ECRYPTFS_TRANSPORT_MISCDEV 3 +#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV #define ECRYPTFS_XATTR_NAME "user.ecryptfs" #define RFC2440_CIPHER_DES3_EDE 0x02 @@ -366,32 +365,62 @@ struct ecryptfs_auth_tok_list_item { }; struct ecryptfs_message { + /* Can never be greater than ecryptfs_message_buf_len */ + /* Used to find the parent msg_ctx */ + /* Inherits from msg_ctx->index */ u32 index; u32 data_len; u8 data[]; }; struct ecryptfs_msg_ctx { -#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 -#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 -#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 - u32 state; - unsigned int index; - unsigned int counter; +#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01 +#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02 +#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03 +#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04 + u8 state; +#define ECRYPTFS_MSG_HELO 100 +#define ECRYPTFS_MSG_QUIT 101 +#define ECRYPTFS_MSG_REQUEST 102 +#define ECRYPTFS_MSG_RESPONSE 103 + u8 type; + u32 index; + /* Counter converts to a sequence number. Each message sent + * out for which we expect a response has an associated + * sequence number. The response must have the same sequence + * number as the counter for the msg_stc for the message to be + * valid. */ + u32 counter; + size_t msg_size; struct ecryptfs_message *msg; struct task_struct *task; struct list_head node; + struct list_head daemon_out_list; struct mutex mux; }; extern unsigned int ecryptfs_transport; -struct ecryptfs_daemon_id { +struct ecryptfs_daemon; + +struct ecryptfs_daemon { +#define ECRYPTFS_DAEMON_IN_READ 0x00000001 +#define ECRYPTFS_DAEMON_IN_POLL 0x00000002 +#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004 +#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 + u32 flags; + u32 num_queued_msg_ctx; pid_t pid; - uid_t uid; - struct hlist_node id_chain; + uid_t euid; + struct task_struct *task; + struct mutex mux; + struct list_head msg_ctx_out_queue; + wait_queue_head_t wait; + struct hlist_node euid_chain; }; +extern struct mutex ecryptfs_daemon_hash_mux; + static inline struct ecryptfs_file_info * ecryptfs_file_to_private(struct file *file) { @@ -593,13 +622,13 @@ int ecryptfs_init_messaging(unsigned int transport); void ecryptfs_release_messaging(unsigned int transport); int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, pid_t daemon_pid); int ecryptfs_init_netlink(void); void ecryptfs_release_netlink(void); int ecryptfs_send_connector(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, pid_t daemon_pid); int ecryptfs_init_connector(void); void ecryptfs_release_connector(void); @@ -642,5 +671,19 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid); +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size); +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length); +int ecryptfs_init_ecryptfs_miscdev(void); +void ecryptfs_destroy_ecryptfs_miscdev(void); +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon); +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, pid_t pid); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 682b1b2482c..e82b457180b 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -65,7 +65,7 @@ static int process_request_key_err(long err_code) } /** - * parse_packet_length + * ecryptfs_parse_packet_length * @data: Pointer to memory containing length at offset * @size: This function writes the decoded size to this memory * address; zero on error @@ -73,8 +73,8 @@ static int process_request_key_err(long err_code) * * Returns zero on success; non-zero on error */ -static int parse_packet_length(unsigned char *data, size_t *size, - size_t *length_size) +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size) { int rc = 0; @@ -105,7 +105,7 @@ out: } /** - * write_packet_length + * ecryptfs_write_packet_length * @dest: The byte array target into which to write the length. Must * have at least 5 bytes allocated. * @size: The length to write. @@ -114,8 +114,8 @@ out: * * Returns zero on success; non-zero on error. */ -static int write_packet_length(char *dest, size_t size, - size_t *packet_size_length) +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length) { int rc = 0; @@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, goto out; } message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, i += packet_size_len; memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; - rc = write_packet_length(&message[i], session_key->encrypted_key_size, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + session_key->encrypted_key_size, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, rc = -EIO; goto out; } - rc = parse_packet_length(&data[i], &m_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, goto out; } message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, /* verify that everything through the encrypted FEK size is present */ if (message_len < 4) { rc = -EIO; + printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable " + "message length is [%d]\n", __func__, message_len, 4); goto out; } if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { - ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n"); rc = -EIO; + printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n", + __func__); goto out; } if (data[i++]) { - ecryptfs_printk(KERN_ERR, "Status indicator has non zero value" - " [%d]\n", data[i-1]); rc = -EIO; + printk(KERN_ERR "%s: Status indicator has non zero " + "value [%d]\n", __func__, data[i-1]); + goto out; } - rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size, + &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, } i += data_len; if (message_len < (i + key_rec->enc_key_size)) { - ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n", - message_len, (i + key_rec->enc_key_size)); rc = -EIO; + printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n", + __func__, message_len, (i + key_rec->enc_key_size)); goto out; } if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { - ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than " - "the maximum key size [%d]\n", - key_rec->enc_key_size, - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); rc = -EIO; + printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than " + "the maximum key size [%d]\n", __func__, + key_rec->enc_key_size, + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); goto out; } memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); @@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), &netlink_message, &netlink_message_length); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); + ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); goto out; } rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, @@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; " "rc = [%d]\n", rc); @@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", rc); @@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents, rc = -EINVAL; goto out; } - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Invalid tag 11 packet format\n"); goto out; @@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, auth_tok->token.private_key.key_size; rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " - "via a pki"); + printk(KERN_ERR "Failed to encrypt session key via a key " + "module; rc = [%d]\n", rc); goto out; } if (ecryptfs_verbosity > 0) { @@ -1430,8 +1436,9 @@ encrypted_session_key_set: goto out; } dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " "header; cannot generate packet length\n"); @@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents, goto out; } dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_length)], - (max_packet_size - 4), &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_length)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 11 packet header; cannot " "generate packet length. rc = [%d]\n", rc); @@ -1682,8 +1690,9 @@ encrypted_session_key_set: dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) * to get the number of octets in the actual Tag 3 packet */ - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 3 packet header; cannot " "generate packet length. rc = [%d]\n", rc); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 9cc2aec27b0..c6038bd6089 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -1,7 +1,7 @@ /** * eCryptfs: Linux filesystem encryption layer * - * Copyright (C) 2004-2006 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow * Tyler Hicks * @@ -26,13 +26,13 @@ static LIST_HEAD(ecryptfs_msg_ctx_free_list); static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); static struct mutex ecryptfs_msg_ctx_lists_mux; -static struct hlist_head *ecryptfs_daemon_id_hash; -static struct mutex ecryptfs_daemon_id_hash_mux; +static struct hlist_head *ecryptfs_daemon_hash; +struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_buckets; #define ecryptfs_uid_hash(uid) \ hash_long((unsigned long)uid, ecryptfs_hash_buckets) -static unsigned int ecryptfs_msg_counter; +static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; /** @@ -40,9 +40,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; * @msg_ctx: The context that was acquired from the free list * * Acquires a context element from the free list and locks the mutex - * on the context. Returns zero on success; non-zero on error or upon - * failure to acquire a free context element. Be sure to lock the - * list mutex before calling. + * on the context. Sets the msg_ctx task to current. Returns zero on + * success; non-zero on error or upon failure to acquire a free + * context element. Must be called with ecryptfs_msg_ctx_lists_mux + * held. */ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) { @@ -50,11 +51,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) int rc; if (list_empty(&ecryptfs_msg_ctx_free_list)) { - ecryptfs_printk(KERN_WARNING, "The eCryptfs free " - "context list is empty. It may be helpful to " - "specify the ecryptfs_message_buf_len " - "parameter to be greater than the current " - "value of [%d]\n", ecryptfs_message_buf_len); + printk(KERN_WARNING "%s: The eCryptfs free " + "context list is empty. It may be helpful to " + "specify the ecryptfs_message_buf_len " + "parameter to be greater than the current " + "value of [%d]\n", __func__, ecryptfs_message_buf_len); rc = -ENOMEM; goto out; } @@ -75,8 +76,7 @@ out: * ecryptfs_msg_ctx_free_to_alloc * @msg_ctx: The context to move from the free list to the alloc list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) { @@ -89,36 +89,37 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) * ecryptfs_msg_ctx_alloc_to_free * @msg_ctx: The context to move from the alloc list to the free list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ -static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) { list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); if (msg_ctx->msg) kfree(msg_ctx->msg); + msg_ctx->msg = NULL; msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; } /** - * ecryptfs_find_daemon_id - * @uid: The user id which maps to the desired daemon id - * @id: If return value is zero, points to the desired daemon id - * pointer + * ecryptfs_find_daemon_by_euid + * @euid: The effective user id which maps to the desired daemon id + * @daemon: If return value is zero, points to the desired daemon pointer * - * Search the hash list for the given user id. Returns zero if the - * user id exists in the list; non-zero otherwise. The daemon id hash - * mutex should be held before calling this function. + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Search the hash list for the given user id. + * + * Returns zero if the user id exists in the list; non-zero otherwise. */ -static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid) { struct hlist_node *elem; int rc; - hlist_for_each_entry(*id, elem, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], - id_chain) { - if ((*id)->uid == uid) { + hlist_for_each_entry(*daemon, elem, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], + euid_chain) { + if ((*daemon)->euid == euid) { rc = 0; goto out; } @@ -128,181 +129,291 @@ out: return rc; } -static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, - pid_t pid) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); + +/** + * ecryptfs_send_raw_message + * @transport: Transport type + * @msg_type: Message type + * @daemon: Daemon struct for recipient of message + * + * A raw message is one that does not include an ecryptfs_message + * struct. It simply has a type. + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, + struct ecryptfs_daemon *daemon) { + struct ecryptfs_msg_ctx *msg_ctx; int rc; switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); + rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, + daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, + &msg_ctx); + if (rc) { + printk(KERN_ERR "%s: Error whilst attempting to send " + "message via procfs; rc = [%d]\n", __func__, rc); + goto out; + } + /* Raw messages are logically context-free (e.g., no + * reply is expected), so we set the state of the + * ecryptfs_msg_ctx object to indicate that it should + * be freed as soon as the transport sends out the message. */ + mutex_lock(&msg_ctx->mux); + msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; + mutex_unlock(&msg_ctx->mux); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } +out: + return rc; +} + +/** + * ecryptfs_spawn_daemon - Create and initialize a new daemon struct + * @daemon: Pointer to set to newly allocated daemon struct + * @euid: Effective user id for the daemon + * @pid: Process id for the daemon + * + * Must be called ceremoniously while in possession of + * ecryptfs_sacred_daemon_hash_mux + * + * Returns zero on success; non-zero otherwise + */ +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, pid_t pid) +{ + int rc = 0; + + (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); + if (!(*daemon)) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); + goto out; + } + (*daemon)->euid = euid; + (*daemon)->pid = pid; + (*daemon)->task = current; + mutex_init(&(*daemon)->mux); + INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); + init_waitqueue_head(&(*daemon)->wait); + (*daemon)->num_queued_msg_ctx = 0; + hlist_add_head(&(*daemon)->euid_chain, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]); +out: return rc; } /** * ecryptfs_process_helo * @transport: The underlying transport (netlink, etc.) - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message * @pid: The process ID for the userspace program that sent the * message * - * Adds the uid and pid values to the daemon id hash. If a uid + * Adds the euid and pid values to the daemon euid hash. If an euid * already has a daemon pid registered, the daemon will be - * unregistered before the new daemon id is put into the hash list. - * Returns zero after adding a new daemon id to the hash list; + * unregistered before the new daemon is put into the hash list. + * Returns zero after adding a new daemon to the hash list; * non-zero otherwise. */ -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) +int ecryptfs_process_helo(unsigned int transport, uid_t euid, pid_t pid) { - struct ecryptfs_daemon_id *new_id; - struct ecryptfs_daemon_id *old_id; + struct ecryptfs_daemon *new_daemon; + struct ecryptfs_daemon *old_daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); - if (!new_id) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " - "to register daemon [%d] for user [%d]\n", - pid, uid); - goto unlock; - } - if (!ecryptfs_find_daemon_id(uid, &old_id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid); + if (rc != 0) { printk(KERN_WARNING "Received request from user [%d] " "to register daemon [%d]; unregistering daemon " - "[%d]\n", uid, pid, old_id->pid); - hlist_del(&old_id->id_chain); - rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, - old_id->pid); + "[%d]\n", euid, pid, old_daemon->pid); + rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, + old_daemon); if (rc) printk(KERN_WARNING "Failed to send QUIT " "message to daemon [%d]; rc = [%d]\n", - old_id->pid, rc); - kfree(old_id); + old_daemon->pid, rc); + hlist_del(&old_daemon->euid_chain); + kfree(old_daemon); } - new_id->uid = uid; - new_id->pid = pid; - hlist_add_head(&new_id->id_chain, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_spawn_daemon(&new_daemon, euid, pid); + if (rc) + printk(KERN_ERR "%s: The gods are displeased with this attempt " + "to create a new daemon object for euid [%d]; pid [%d]; " + "rc = [%d]\n", __func__, euid, pid, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_exorcise_daemon - Destroy the daemon struct + * + * Must be called ceremoniously while in possession of + * ecryptfs_daemon_hash_mux and the daemon's own mux. + */ +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) +{ + struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp; + int rc = 0; + + mutex_lock(&daemon->mux); + if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ) + || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { + rc = -EBUSY; + printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " + "[%d], but it is in the midst of a read or a poll\n", + __func__, daemon->pid); + mutex_unlock(&daemon->mux); + goto out; + } + list_for_each_entry_safe(msg_ctx, msg_ctx_tmp, + &daemon->msg_ctx_out_queue, daemon_out_list) { + list_del(&msg_ctx->daemon_out_list); + daemon->num_queued_msg_ctx--; + printk(KERN_WARNING "%s: Warning: dropping message that is in " + "the out queue of a dying daemon\n", __func__); + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); + } + hlist_del(&daemon->euid_chain); + if (daemon->task) + wake_up_process(daemon->task); + mutex_unlock(&daemon->mux); + memset(daemon, 0, sizeof(*daemon)); + kfree(daemon); +out: return rc; } /** * ecryptfs_process_quit - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message * @pid: The process ID for the userspace program that sent the * message * - * Deletes the corresponding daemon id for the given uid and pid, if + * Deletes the corresponding daemon for the given euid and pid, if * it is the registered that is requesting the deletion. Returns zero - * after deleting the desired daemon id; non-zero otherwise. + * after deleting the desired daemon; non-zero otherwise. */ -int ecryptfs_process_quit(uid_t uid, pid_t pid) +int ecryptfs_process_quit(uid_t euid, pid_t pid) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(uid, &id)) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " - "unregister unrecognized daemon [%d]\n", uid, - pid); - goto unlock; - } - if (id->pid != pid) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid); + if (rc || !daemon) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " - "with pid [%d] to unregister daemon [%d]\n", - uid, pid, id->pid); - goto unlock; + printk(KERN_ERR "Received request from user [%d] to " + "unregister unrecognized daemon [%d]\n", euid, pid); + goto out_unlock; } - hlist_del(&id->id_chain); - kfree(id); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_exorcise_daemon(daemon); +out_unlock: + mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } /** * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check - * msg->data_len + * msg->data_len and free the memory * @pid: The process ID of the userspace application that sent the * message - * @seq: The sequence number of the message + * @seq: The sequence number of the message; must match the sequence + * number for the existing message context waiting for this + * response + * + * Processes a response message after sending an operation request to + * userspace. Some other process is awaiting this response. Before + * sending out its first communications, the other process allocated a + * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The + * response message contains this index so that we can copy over the + * response message into the msg_ctx that the process holds a + * reference to. The other process is going to wake up, check to see + * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then + * proceed to read off and process the response message. Returns zero + * upon delivery to desired context element; non-zero upon delivery + * failure or error. * - * Processes a response message after sending a operation request to - * userspace. Returns zero upon delivery to desired context element; - * non-zero upon delivery failure or error. + * Returns zero on success; non-zero otherwise */ -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, pid_t pid, u32 seq) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; - int msg_size; + size_t msg_size; int rc; if (msg->index >= ecryptfs_message_buf_len) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Attempt to reference " - "context buffer at index [%d]; maximum " - "allowable is [%d]\n", msg->index, - (ecryptfs_message_buf_len - 1)); + printk(KERN_ERR "%s: Attempt to reference " + "context buffer at index [%d]; maximum " + "allowable is [%d]\n", __func__, msg->index, + (ecryptfs_message_buf_len - 1)); goto out; } msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); - if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (rc) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "User [%d] received a " - "message response from process [%d] but does " - "not have a registered daemon\n", - msg_ctx->task->euid, pid); + printk(KERN_WARNING "%s: User [%d] received a " + "message response from process [%d] but does " + "not have a registered daemon\n", __func__, + msg_ctx->task->euid, pid); goto wake_up; } - if (msg_ctx->task->euid != uid) { + if (msg_ctx->task->euid != euid) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "Received message from user " - "[%d]; expected message from user [%d]\n", - uid, msg_ctx->task->euid); + printk(KERN_WARNING "%s: Received message from user " + "[%d]; expected message from user [%d]\n", __func__, + euid, msg_ctx->task->euid); goto unlock; } - if (id->pid != pid) { + if (daemon->pid != pid) { rc = -EBADMSG; - ecryptfs_printk(KERN_ERR, "User [%d] received a " - "message response from an unrecognized " - "process [%d]\n", msg_ctx->task->euid, pid); + printk(KERN_ERR "%s: User [%d] sent a message response " + "from an unrecognized process [%d]\n", + __func__, msg_ctx->task->euid, pid); goto unlock; } if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Desired context element is not " - "pending a response\n"); + printk(KERN_WARNING "%s: Desired context element is not " + "pending a response\n", __func__); goto unlock; } else if (msg_ctx->counter != seq) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " - "expected [%d]; received [%d]\n", - msg_ctx->counter, seq); + printk(KERN_WARNING "%s: Invalid message sequence; " + "expected [%d]; received [%d]\n", __func__, + msg_ctx->counter, seq); goto unlock; } - msg_size = sizeof(*msg) + msg->data_len; + msg_size = (sizeof(*msg) + msg->data_len); msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } memcpy(msg_ctx->msg, msg, msg_size); @@ -317,34 +428,37 @@ out: } /** - * ecryptfs_send_message + * ecryptfs_send_message_locked * @transport: The transport over which to send the message (i.e., * netlink) * @data: The data to send * @data_len: The length of data * @msg_ctx: The message context allocated for the send + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise */ -int ecryptfs_send_message(unsigned int transport, char *data, int data_len, - struct ecryptfs_msg_ctx **msg_ctx) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(current->euid, &id)) { - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + if (rc || !daemon) { rc = -ENOTCONN; - ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " - "registered\n", current->euid); + printk(KERN_ERR "%s: User [%d] does not have a daemon " + "registered\n", __func__, current->euid); goto out; } - mutex_unlock(&ecryptfs_daemon_id_hash_mux); mutex_lock(&ecryptfs_msg_ctx_lists_mux); rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); if (rc) { mutex_unlock(&ecryptfs_msg_ctx_lists_mux); - ecryptfs_printk(KERN_WARNING, "Could not claim a free " - "context element\n"); + printk(KERN_WARNING "%s: Could not claim a free " + "context element\n", __func__); goto out; } ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); @@ -352,22 +466,49 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len, mutex_unlock(&ecryptfs_msg_ctx_lists_mux); switch (transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, - ECRYPTFS_NLMSG_REQUEST, 0, id->pid); + rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, + 0, daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, + 0, daemon); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } - if (rc) { - printk(KERN_ERR "Error attempting to send message to userspace " - "daemon; rc = [%d]\n", rc); - } + if (rc) + printk(KERN_ERR "%s: Error attempting to send message to " + "userspace daemon; rc = [%d]\n", __func__, rc); out: return rc; } +/** + * ecryptfs_send_message + * @transport: The transport over which to send the message (i.e., + * netlink) + * @data: The data to send + * @data_len: The length of data + * @msg_ctx: The message context allocated for the send + * + * Grabs ecryptfs_daemon_hash_mux. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_message(unsigned int transport, char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_send_message_locked(transport, data, data_len, + ECRYPTFS_MSG_REQUEST, msg_ctx); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + /** * ecryptfs_wait_for_response * @msg_ctx: The context that was assigned when sending a message @@ -377,7 +518,7 @@ out: * of time exceeds ecryptfs_message_wait_timeout. If zero is * returned, msg will point to a valid message from userspace; a * non-zero value is returned upon failure to receive a message or an - * error occurs. + * error occurs. Callee must free @msg on success. */ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **msg) @@ -413,32 +554,32 @@ int ecryptfs_init_messaging(unsigned int transport) if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; - ecryptfs_printk(KERN_WARNING, "Specified number of users is " - "too large, defaulting to [%d] users\n", - ecryptfs_number_of_users); + printk(KERN_WARNING "%s: Specified number of users is " + "too large, defaulting to [%d] users\n", __func__, + ecryptfs_number_of_users); } - mutex_init(&ecryptfs_daemon_id_hash_mux); - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_init(&ecryptfs_daemon_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); ecryptfs_hash_buckets = 1; while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) ecryptfs_hash_buckets++; - ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) - * ecryptfs_hash_buckets, GFP_KERNEL); - if (!ecryptfs_daemon_id_hash) { + ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) + * ecryptfs_hash_buckets), GFP_KERNEL); + if (!ecryptfs_daemon_hash) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); + mutex_unlock(&ecryptfs_daemon_hash_mux); goto out; } for (i = 0; i < ecryptfs_hash_buckets; i++) - INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); - + INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); + mutex_unlock(&ecryptfs_daemon_hash_mux); ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) - * ecryptfs_message_buf_len), GFP_KERNEL); + * ecryptfs_message_buf_len), + GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; } mutex_init(&ecryptfs_msg_ctx_lists_mux); @@ -446,6 +587,7 @@ int ecryptfs_init_messaging(unsigned int transport) ecryptfs_msg_counter = 0; for (i = 0; i < ecryptfs_message_buf_len; i++) { INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); + INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list); mutex_init(&ecryptfs_msg_ctx_arr[i].mux); mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); ecryptfs_msg_ctx_arr[i].index = i; @@ -464,6 +606,11 @@ int ecryptfs_init_messaging(unsigned int transport) if (rc) ecryptfs_release_messaging(transport); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_init_ecryptfs_miscdev(); + if (rc) + ecryptfs_release_messaging(transport); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: @@ -488,27 +635,37 @@ void ecryptfs_release_messaging(unsigned int transport) kfree(ecryptfs_msg_ctx_arr); mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } - if (ecryptfs_daemon_id_hash) { + if (ecryptfs_daemon_hash) { struct hlist_node *elem; - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int i; - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); for (i = 0; i < ecryptfs_hash_buckets; i++) { - hlist_for_each_entry(id, elem, - &ecryptfs_daemon_id_hash[i], - id_chain) { - hlist_del(elem); - kfree(id); + int rc; + + hlist_for_each_entry(daemon, elem, + &ecryptfs_daemon_hash[i], + euid_chain) { + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) + printk(KERN_ERR "%s: Error whilst " + "attempting to destroy daemon; " + "rc = [%d]. Dazed and confused, " + "but trying to continue.\n", + __func__, rc); } } - kfree(ecryptfs_daemon_id_hash); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + kfree(ecryptfs_daemon_hash); + mutex_unlock(&ecryptfs_daemon_hash_mux); } switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: ecryptfs_release_netlink(); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + ecryptfs_destroy_ecryptfs_miscdev(); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 72dfec48ea2..0c559731ae3 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -196,7 +196,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, if (!msg_ctx->msg) { rc = -ENOMEM; printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kmalloc(%d, GFP_KERNEL)\n", __func__, + "to kmalloc(%Zd, GFP_KERNEL)\n", __func__, (sizeof(*msg_ctx->msg) + data_size)); goto out_unlock; } @@ -232,7 +232,7 @@ out_unlock: * * Returns the number of bytes copied into the user buffer */ -static int +static ssize_t ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index f638a698dc5..eb70f69d705 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -44,7 +44,7 @@ static struct sock *ecryptfs_nl_sock; * upon sending the message; non-zero upon error. */ int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, pid_t daemon_pid) { struct sk_buff *skb; @@ -176,20 +176,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb) goto free; } switch (nlh->nlmsg_type) { - case ECRYPTFS_NLMSG_RESPONSE: + case ECRYPTFS_MSG_RESPONSE: if (ecryptfs_process_nl_response(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "deliver netlink response to " "requesting operation\n"); } break; - case ECRYPTFS_NLMSG_HELO: + case ECRYPTFS_MSG_HELO: if (ecryptfs_process_nl_helo(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill HELO request\n"); } break; - case ECRYPTFS_NLMSG_QUIT: + case ECRYPTFS_MSG_QUIT: if (ecryptfs_process_nl_quit(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill QUIT request\n"); -- cgit v1.2.3 From 6a3fd92e73fffd9e583650c56ad9558afe51dc5c Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Tue, 29 Apr 2008 00:59:52 -0700 Subject: eCryptfs: make key module subsystem respect namespaces Make eCryptfs key module subsystem respect namespaces. Since I will be removing the netlink interface in a future patch, I just made changes to the netlink.c code so that it will not break the build. With my recent patches, the kernel module currently defaults to the device handle interface rather than the netlink interface. [akpm@linux-foundation.org: export free_user_ns()] Signed-off-by: Michael Halcrow Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/ecryptfs_kernel.h | 25 ++++++++----- fs/ecryptfs/messaging.c | 81 ++++++++++++++++++++++++++++++++----------- fs/ecryptfs/miscdev.c | 68 +++++++++++++++++++++++------------- fs/ecryptfs/netlink.c | 25 ++++++++----- 4 files changed, 135 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 72e117706a6..951ee33a022 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -34,6 +34,7 @@ #include #include #include +#include /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 @@ -410,8 +411,9 @@ struct ecryptfs_daemon { #define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 u32 flags; u32 num_queued_msg_ctx; - pid_t pid; + struct pid *pid; uid_t euid; + struct user_namespace *user_ns; struct task_struct *task; struct mutex mux; struct list_head msg_ctx_out_queue; @@ -610,10 +612,13 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); -int ecryptfs_process_quit(uid_t uid, pid_t pid); -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, - pid_t pid, u32 seq); +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid); +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, + struct user_namespace *user_ns, struct pid *pid, + u32 seq); int ecryptfs_send_message(unsigned int transport, char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, @@ -623,13 +628,13 @@ void ecryptfs_release_messaging(unsigned int transport); int ecryptfs_send_netlink(char *data, int data_len, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, pid_t daemon_pid); + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_netlink(void); void ecryptfs_release_netlink(void); int ecryptfs_send_connector(char *data, int data_len, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, pid_t daemon_pid); + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_connector(void); void ecryptfs_release_connector(void); void @@ -672,7 +677,8 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, @@ -684,6 +690,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, u16 msg_flags, struct ecryptfs_daemon *daemon); void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, pid_t pid); +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index c6038bd6089..1b5c20058ac 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -20,6 +20,8 @@ * 02111-1307, USA. */ #include +#include +#include #include "ecryptfs_kernel.h" static LIST_HEAD(ecryptfs_msg_ctx_free_list); @@ -103,6 +105,7 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) /** * ecryptfs_find_daemon_by_euid * @euid: The effective user id which maps to the desired daemon id + * @user_ns: The namespace in which @euid applies * @daemon: If return value is zero, points to the desired daemon pointer * * Must be called with ecryptfs_daemon_hash_mux held. @@ -111,7 +114,8 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) * * Returns zero if the user id exists in the list; non-zero otherwise. */ -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns) { struct hlist_node *elem; int rc; @@ -119,7 +123,7 @@ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid) hlist_for_each_entry(*daemon, elem, &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], euid_chain) { - if ((*daemon)->euid == euid) { + if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) { rc = 0; goto out; } @@ -186,6 +190,7 @@ out: * ecryptfs_spawn_daemon - Create and initialize a new daemon struct * @daemon: Pointer to set to newly allocated daemon struct * @euid: Effective user id for the daemon + * @user_ns: The namespace in which @euid applies * @pid: Process id for the daemon * * Must be called ceremoniously while in possession of @@ -194,7 +199,8 @@ out: * Returns zero on success; non-zero otherwise */ int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, pid_t pid) +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) { int rc = 0; @@ -206,7 +212,8 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, pid_t pid) goto out; } (*daemon)->euid = euid; - (*daemon)->pid = pid; + (*daemon)->user_ns = get_user_ns(user_ns); + (*daemon)->pid = get_pid(pid); (*daemon)->task = current; mutex_init(&(*daemon)->mux); INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); @@ -222,6 +229,7 @@ out: * ecryptfs_process_helo * @transport: The underlying transport (netlink, etc.) * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * @@ -231,32 +239,33 @@ out: * Returns zero after adding a new daemon to the hash list; * non-zero otherwise. */ -int ecryptfs_process_helo(unsigned int transport, uid_t euid, pid_t pid) +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) { struct ecryptfs_daemon *new_daemon; struct ecryptfs_daemon *old_daemon; int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid); + rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns); if (rc != 0) { printk(KERN_WARNING "Received request from user [%d] " - "to register daemon [%d]; unregistering daemon " - "[%d]\n", euid, pid, old_daemon->pid); + "to register daemon [0x%p]; unregistering daemon " + "[0x%p]\n", euid, pid, old_daemon->pid); rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, old_daemon); if (rc) printk(KERN_WARNING "Failed to send QUIT " - "message to daemon [%d]; rc = [%d]\n", + "message to daemon [0x%p]; rc = [%d]\n", old_daemon->pid, rc); hlist_del(&old_daemon->euid_chain); kfree(old_daemon); } - rc = ecryptfs_spawn_daemon(&new_daemon, euid, pid); + rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid); if (rc) printk(KERN_ERR "%s: The gods are displeased with this attempt " - "to create a new daemon object for euid [%d]; pid [%d]; " - "rc = [%d]\n", __func__, euid, pid, rc); + "to create a new daemon object for euid [%d]; pid " + "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc); mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } @@ -277,7 +286,7 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { rc = -EBUSY; printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " - "[%d], but it is in the midst of a read or a poll\n", + "[0x%p], but it is in the midst of a read or a poll\n", __func__, daemon->pid); mutex_unlock(&daemon->mux); goto out; @@ -293,6 +302,10 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) hlist_del(&daemon->euid_chain); if (daemon->task) wake_up_process(daemon->task); + if (daemon->pid) + put_pid(daemon->pid); + if (daemon->user_ns) + put_user_ns(daemon->user_ns); mutex_unlock(&daemon->mux); memset(daemon, 0, sizeof(*daemon)); kfree(daemon); @@ -303,6 +316,7 @@ out: /** * ecryptfs_process_quit * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * @@ -310,17 +324,18 @@ out: * it is the registered that is requesting the deletion. Returns zero * after deleting the desired daemon; non-zero otherwise. */ -int ecryptfs_process_quit(uid_t euid, pid_t pid) +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { struct ecryptfs_daemon *daemon; int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns); if (rc || !daemon) { rc = -EINVAL; printk(KERN_ERR "Received request from user [%d] to " - "unregister unrecognized daemon [%d]\n", euid, pid); + "unregister unrecognized daemon [0x%p]\n", euid, pid); goto out_unlock; } rc = ecryptfs_exorcise_daemon(daemon); @@ -354,11 +369,14 @@ out_unlock: * Returns zero on success; non-zero otherwise */ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, - pid_t pid, u32 seq) + struct user_namespace *user_ns, struct pid *pid, + u32 seq) { struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; + struct nsproxy *nsproxy; + struct user_namespace *current_user_ns; int rc; if (msg->index >= ecryptfs_message_buf_len) { @@ -372,12 +390,25 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid); + rcu_read_lock(); + nsproxy = task_nsproxy(msg_ctx->task); + if (nsproxy == NULL) { + rc = -EBADMSG; + printk(KERN_ERR "%s: Receiving process is a zombie. Dropping " + "message.\n", __func__); + rcu_read_unlock(); + mutex_unlock(&ecryptfs_daemon_hash_mux); + goto wake_up; + } + current_user_ns = nsproxy->user_ns; + rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid, + current_user_ns); + rcu_read_unlock(); mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { rc = -EBADMSG; printk(KERN_WARNING "%s: User [%d] received a " - "message response from process [%d] but does " + "message response from process [0x%p] but does " "not have a registered daemon\n", __func__, msg_ctx->task->euid, pid); goto wake_up; @@ -389,10 +420,17 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, euid, msg_ctx->task->euid); goto unlock; } + if (current_user_ns != user_ns) { + rc = -EBADMSG; + printk(KERN_WARNING "%s: Received message from user_ns " + "[0x%p]; expected message from user_ns [0x%p]\n", + __func__, user_ns, nsproxy->user_ns); + goto unlock; + } if (daemon->pid != pid) { rc = -EBADMSG; printk(KERN_ERR "%s: User [%d] sent a message response " - "from an unrecognized process [%d]\n", + "from an unrecognized process [0x%p]\n", __func__, msg_ctx->task->euid, pid); goto unlock; } @@ -446,7 +484,8 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, struct ecryptfs_daemon *daemon; int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); if (rc || !daemon) { rc = -ENOTCONN; printk(KERN_ERR "%s: User [%d] does not have a daemon " diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 0c559731ae3..788995efd1d 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -46,7 +46,8 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -92,10 +93,12 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); if (rc || !daemon) { rc = ecryptfs_spawn_daemon(&daemon, current->euid, - current->pid); + current->nsproxy->user_ns, + task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " "rc = [%d]\n", __func__, rc); @@ -103,18 +106,18 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) } } mutex_lock(&daemon->mux); - if (daemon->pid != current->pid) { + if (daemon->pid != task_pid(current)) { rc = -EINVAL; - printk(KERN_ERR "%s: pid [%d] has registered with euid [%d], " - "but pid [%d] has attempted to open the handle " + printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], " + "but pid [0x%p] has attempted to open the handle " "instead\n", __func__, daemon->pid, daemon->euid, - current->pid); + task_pid(current)); goto out_unlock_daemon; } if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { rc = -EBUSY; printk(KERN_ERR "%s: Miscellaneous device handle may only be " - "opened once per daemon; pid [%d] already has this " + "opened once per daemon; pid [0x%p] already has this " "handle open\n", __func__, daemon->pid); goto out_unlock_daemon; } @@ -147,10 +150,11 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); - BUG_ON(daemon->pid != current->pid); + BUG_ON(daemon->pid != task_pid(current)); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); @@ -247,7 +251,8 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -285,7 +290,8 @@ check_list: goto check_list; } BUG_ON(current->euid != daemon->euid); - BUG_ON(current->pid != daemon->pid); + BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); BUG_ON(!msg_ctx); @@ -355,15 +361,18 @@ out_unlock_daemon: /** * ecryptfs_miscdev_helo * @euid: effective user id of miscdevess sending helo packet + * @user_ns: The namespace in which @euid applies * @pid: miscdevess id of miscdevess sending helo packet * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_miscdev_helo(uid_t uid, pid_t pid) +static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { int rc; - rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, uid, pid); + rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns, + pid); if (rc) printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); return rc; @@ -372,15 +381,17 @@ static int ecryptfs_miscdev_helo(uid_t uid, pid_t pid) /** * ecryptfs_miscdev_quit * @euid: effective user id of miscdevess sending quit packet + * @user_ns: The namespace in which @euid applies * @pid: miscdevess id of miscdevess sending quit packet * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_miscdev_quit(uid_t euid, pid_t pid) +static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { int rc; - rc = ecryptfs_process_quit(euid, pid); + rc = ecryptfs_process_quit(euid, user_ns, pid); if (rc) printk(KERN_WARNING "Error processing QUIT message; rc = [%d]\n", rc); @@ -392,13 +403,15 @@ static int ecryptfs_miscdev_quit(uid_t euid, pid_t pid) * @data: Bytes comprising struct ecryptfs_message * @data_size: sizeof(struct ecryptfs_message) + data len * @euid: Effective user id of miscdevess sending the miscdev response + * @user_ns: The namespace in which @euid applies * @pid: Miscdevess id of miscdevess sending the miscdev response * @seq: Sequence number for miscdev response packet * * Returns zero on success; non-zero otherwise */ static int ecryptfs_miscdev_response(char *data, size_t data_size, - uid_t euid, pid_t pid, u32 seq) + uid_t euid, struct user_namespace *user_ns, + struct pid *pid, u32 seq) { struct ecryptfs_message *msg = (struct ecryptfs_message *)data; int rc; @@ -410,7 +423,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size, rc = -EINVAL; goto out; } - rc = ecryptfs_process_response(msg, euid, pid, seq); + rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -491,27 +504,32 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, } rc = ecryptfs_miscdev_response(&data[i], packet_size, current->euid, - current->pid, seq); + current->nsproxy->user_ns, + task_pid(current), seq); if (rc) printk(KERN_WARNING "%s: Failed to deliver miscdev " "response to requesting operation; rc = [%d]\n", __func__, rc); break; case ECRYPTFS_MSG_HELO: - rc = ecryptfs_miscdev_helo(current->euid, current->pid); + rc = ecryptfs_miscdev_helo(current->euid, + current->nsproxy->user_ns, + task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to process " - "helo from pid [%d]; rc = [%d]\n", __func__, - current->pid, rc); + "helo from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); goto out_free; } break; case ECRYPTFS_MSG_QUIT: - rc = ecryptfs_miscdev_quit(current->euid, current->pid); + rc = ecryptfs_miscdev_quit(current->euid, + current->nsproxy->user_ns, + task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to process " - "quit from pid [%d]; rc = [%d]\n", __func__, - current->pid, rc); + "quit from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); goto out_free; } break; diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index eb70f69d705..e0abad62b39 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -45,7 +45,7 @@ static struct sock *ecryptfs_nl_sock; */ int ecryptfs_send_netlink(char *data, int data_len, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, pid_t daemon_pid) + u16 msg_flags, struct pid *daemon_pid) { struct sk_buff *skb; struct nlmsghdr *nlh; @@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len, ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); goto out; } - nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, + nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, msg_type, payload_len); nlh->nlmsg_flags = msg_flags; if (msg_ctx && payload_len) { @@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len, msg->data_len = data_len; memcpy(msg->data, data, data_len); } - rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); + rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " "message; rc = [%d]\n", rc); @@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); struct ecryptfs_message *msg = NLMSG_DATA(nlh); + struct pid *pid; int rc; if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { @@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) "incorrectly specified data length\n"); goto out; } - rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, + pid, nlh->nlmsg_seq); + put_pid(pid); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -126,11 +129,13 @@ out: */ static int ecryptfs_process_nl_helo(struct sk_buff *skb) { + struct pid *pid; int rc; + pid = find_get_pid(NETLINK_CREDS(skb)->pid); rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, - NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); return rc; @@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb) */ static int ecryptfs_process_nl_quit(struct sk_buff *skb) { + struct pid *pid; int rc; - rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing QUIT message; rc = [%d]\n", rc); -- cgit v1.2.3 From 2f9b12a31fcb738ea8c9eb0d4ddf906c6f1d696c Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Tue, 29 Apr 2008 00:59:52 -0700 Subject: eCryptfs: protect crypt_stat->flags in ecryptfs_open() Make sure crypt_stat->flags is protected with a lock in ecryptfs_open(). Signed-off-by: Michael Halcrow Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b8f5ed4ade..2258b8f654a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + mutex_lock(&crypt_stat->cs_mutex); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); rc = 0; goto out; } -- cgit v1.2.3 From 08ce5f16ee466ffc5bf243800deeecd77d9eaf50 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 29 Apr 2008 01:00:10 -0700 Subject: cgroups: implement device whitelist Implement a cgroup to track and enforce open and mknod restrictions on device files. A device cgroup associates a device access whitelist with each cgroup. A whitelist entry has 4 fields. 'type' is a (all), c (char), or b (block). 'all' means it applies to all types and all major and minor numbers. Major and minor are either an integer or * for all. Access is a composition of r (read), w (write), and m (mknod). The root device cgroup starts with rwm to 'all'. A child devcg gets a copy of the parent. Admins can then remove devices from the whitelist or add new entries. A child cgroup can never receive a device access which is denied its parent. However when a device access is removed from a parent it will not also be removed from the child(ren). An entry is added using devices.allow, and removed using devices.deny. For instance echo 'c 1:3 mr' > /cgroups/1/devices.allow allows cgroup 1 to read and mknod the device usually known as /dev/null. Doing echo a > /cgroups/1/devices.deny will remove the default 'a *:* mrw' entry. CAP_SYS_ADMIN is needed to change permissions or move another task to a new cgroup. A cgroup may not be granted more permissions than the cgroup's parent has. Any task can move itself between cgroups. This won't be sufficient, but we can decide the best way to adequately restrict movement later. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix may-be-used-uninitialized warning] Signed-off-by: Serge E. Hallyn Acked-by: James Morris Looks-good-to: Pavel Emelyanov Cc: Daniel Hokka Zakrisson Cc: Li Zefan Cc: Paul Menage Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index e179f71bfcb..32fd9655485 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; + retval = devcgroup_inode_permission(inode, mask); + if (retval) + return retval; + return security_inode_permission(inode, mask, nd); } @@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (!dir->i_op || !dir->i_op->mknod) return -EPERM; + error = devcgroup_inode_mknod(mode, dev); + if (error) + return error; + error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; -- cgit v1.2.3 From cf475ad28ac35cc9ba612d67158f29b73b38b05d Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:16 -0700 Subject: cgroups: add an owner to the mm_struct Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Hirokazu Takahashi Cc: David Rientjes , Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Acked-by: Pekka Enberg Reviewed-by: Paul Menage Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 7768453dc98..711bc45d789 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); + mm_update_next_owner(mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); -- cgit v1.2.3 From 6970c8eff85dd450e7eff69dad710dcf594b1bb8 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 29 Apr 2008 01:01:18 -0700 Subject: BINFMT: fill_elf_header cleanup - use straight memset first This patch does simplify fill_elf_header function by setting to zero the whole elf header first. So we fillup the fields we really need only. before: text data bss dec hex filename 11735 80 0 11815 2e27 fs/binfmt_elf.o after: text data bss dec hex filename 11710 80 0 11790 2e0e fs/binfmt_elf.o viola, 25 bytes of text is freed Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9924581df6f..6cb4efbae88 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file, static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags, u8 osabi) { + memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; elf->e_ident[EI_OSABI] = ELF_OSABI; - memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); elf->e_type = ET_CORE; elf->e_machine = machine; elf->e_version = EV_CURRENT; - elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); - elf->e_shoff = 0; elf->e_flags = flags; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = segs; - elf->e_shentsize = 0; - elf->e_shnum = 0; - elf->e_shstrndx = 0; + return; } -- cgit v1.2.3 From 4220b7fe89f8c0623e09168ab81dd0da2fdadd72 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 29 Apr 2008 01:01:18 -0700 Subject: elf: fix shadowed variables in fs/binfmt_elf.c Fix these sparse warings: fs/binfmt_elf.c:1749:29: warning: symbol 'tmp' shadows an earlier one fs/binfmt_elf.c:1734:28: originally declared here fs/binfmt_elf.c:2009:26: warning: symbol 'vma' shadows an earlier one fs/binfmt_elf.c:1892:24: originally declared here [akpm@linux-foundation.org: chose better variable name] Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6cb4efbae88..b25707fee2c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1722,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread_status_size = 0; if (signr) { - struct elf_thread_status *tmp; + struct elf_thread_status *ets; rcu_read_lock(); do_each_thread(g, p) if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { + ets = kzalloc(sizeof(*ets), GFP_ATOMIC); + if (!ets) { rcu_read_unlock(); return 0; } - tmp->thread = p; - list_add(&tmp->list, &info->thread_list); + ets->thread = p; + list_add(&ets->list, &info->thread_list); } while_each_thread(g, p); rcu_read_unlock(); list_for_each(t, &info->thread_list) { - struct elf_thread_status *tmp; int sz; - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(signr, tmp); + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(signr, ets); info->thread_status_size += sz; } } @@ -1997,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; - struct vm_area_struct *vma; + struct vm_area_struct *tmp_vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { + &page, &tmp_vma) <= 0) { DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(0)) { @@ -2010,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un } } else { void *kaddr; - flush_cache_page(vma, addr, + flush_cache_page(tmp_vma, addr, page_to_pfn(page)); kaddr = kmap(page); if ((size += PAGE_SIZE) > limit || -- cgit v1.2.3 From e93b4ea20adb20f1f1f07f10ba5d7dd739d2843e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:35 -0700 Subject: proc: print more information when removing non-empty directories This usually saves one recompile to insert similar printk like below. :) Sample nastygram: remove_proc_entry: removing non-empty directory '/proc/foo', leaking at least 'bar' ------------[ cut here ]------------ WARNING: at fs/proc/generic.c:776 remove_proc_entry+0x18a/0x200() Modules linked in: foo(-) container fan battery dock sbs ac sbshc backlight ipv6 loop af_packet amd_rng sr_mod i2c_amd8111 i2c_amd756 cdrom i2c_core button thermal processor Pid: 3034, comm: rmmod Tainted: G M 2.6.25-rc1 #5 Call Trace: [] warn_on_slowpath+0x64/0x90 [] printk+0x4e/0x60 [] remove_proc_entry+0x18a/0x200 [] mutex_lock_nested+0x1c8/0x2d0 [] __try_stop_module+0x0/0x40 [] sys_delete_module+0x14d/0x200 [] lockdep_sys_exit_thunk+0x35/0x67 [] __up_read+0x27/0xa0 [] trace_hardirqs_on_thunk+0x35/0x3a [] system_call_after_swapgs+0x7b/0x80 ---[ end trace 10ef850597e89c54 ]--- Signed-off-by: Alexey Dobriyan Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a36ad3c75cf..f501f3211ab 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -777,7 +777,12 @@ continue_removing: if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN_ON(de->subdir); + if (de->subdir) { + printk(KERN_WARNING "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); + WARN_ON(1); + } if (atomic_dec_and_test(&de->count)) free_proc_entry(de); break; -- cgit v1.2.3 From 925d1c401fa6cfd0df5d2e37da8981494ccdec07 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Tue, 29 Apr 2008 01:01:36 -0700 Subject: procfs task exe symlink The kernel implements readlink of /proc/pid/exe by getting the file from the first executable VMA. Then the path to the file is reconstructed and reported as the result. Because of the VMA walk the code is slightly different on nommu systems. This patch avoids separate /proc/pid/exe code on nommu systems. Instead of walking the VMAs to find the first executable file-backed VMA we store a reference to the exec'd file in the mm_struct. That reference would prevent the filesystem holding the executable file from being unmounted even after unmapping the VMAs. So we track the number of VM_EXECUTABLE VMAs and drop the new reference when the last one is unmapped. This avoids pinning the mounted filesystem. [akpm@linux-foundation.org: improve comments] [yamamoto@valinux.co.jp: fix dup_mmap] Signed-off-by: Matt Helsley Cc: Oleg Nesterov Cc: David Howells Cc:"Eric W. Biederman" Cc: Christoph Hellwig Cc: Al Viro Cc: Hugh Dickins Signed-off-by: YAMAMOTO Takashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_flat.c | 3 ++- fs/exec.c | 2 ++ fs/proc/base.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/internal.h | 1 - fs/proc/task_mmu.c | 34 ------------------------ fs/proc/task_nommu.c | 34 ------------------------ 6 files changed, 79 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c12cc362fd3..3b40d45a3a1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm, DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); down_write(¤t->mm->mmap_sem); - textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); + textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, + MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { if (!textpos) diff --git a/fs/exec.c b/fs/exec.c index 711bc45d789..a13883903ee 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -964,6 +964,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + set_mm_exe_file(bprm->mm, bprm->file); + /* * Release all of the old mmap stuff */ diff --git a/fs/proc/base.c b/fs/proc/base.c index c5e412a00b1..b48ddb11994 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1181,6 +1181,81 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index bc72f5c8c47..45abb980398 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -48,7 +48,6 @@ extern int maps_protect; extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct path *); extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7415eeb7cc3..e2b8e769f51 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_area_struct * vma; - int result = -ENOENT; - struct task_struct *task = get_proc_task(inode); - struct mm_struct * mm = NULL; - - if (task) { - mm = get_task_mm(task); - put_task_struct(task); - } - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) - break; - vma = vma->vm_next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - static void pad_len_spaces(struct seq_file *m, int len) { len = 25 + sizeof(void*) * 6 - len; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8011528518b..4b733f10845 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -103,40 +103,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_list_struct *vml; - struct vm_area_struct *vma; - struct task_struct *task = get_proc_task(inode); - struct mm_struct *mm = get_task_mm(task); - int result = -ENOENT; - - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vml = mm->context.vmlist; - vma = NULL; - while (vml) { - if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { - vma = vml->vma; - break; - } - vml = vml->next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - /* * display mapping lines for a particular process's /proc/pid/maps */ -- cgit v1.2.3 From 0d5c9f5f59a61cf8e98e2925cb5d81cbe7694305 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:37 -0700 Subject: proc: switch to proc_create() Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 2 -- fs/proc/nommu.c | 2 +- fs/proc/proc_misc.c | 66 ++++++++++++++++++----------------------------------- fs/proc/proc_tty.c | 5 +--- 4 files changed, 24 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 45abb980398..b1d6df671ed 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -46,8 +46,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); extern int maps_protect; -extern void create_seq_entry(char *name, mode_t mode, - const struct file_operations *f); extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 941e95114b5..79ecd281d2c 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = { static int __init proc_nommu_init(void) { - create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); + proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 441a32f0e5f..c4137bb94a9 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -826,14 +826,6 @@ static struct file_operations proc_kpageflags_operations = { struct proc_dir_entry *proc_root_kcore; -void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) -{ - struct proc_dir_entry *entry; - entry = create_proc_entry(name, mode, NULL); - if (entry) - entry->proc_fops = f; -} - void __init proc_misc_init(void) { static struct { @@ -862,66 +854,52 @@ void __init proc_misc_init(void) /* And now for trickier ones */ #ifdef CONFIG_PRINTK - { - struct proc_dir_entry *entry; - entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); - if (entry) - entry->proc_fops = &proc_kmsg_operations; - } + proc_create("kmsg", S_IRUSR, &proc_root, &proc_kmsg_operations); #endif - create_seq_entry("locks", 0, &proc_locks_operations); - create_seq_entry("devices", 0, &proc_devinfo_operations); - create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); + proc_create("locks", 0, NULL, &proc_locks_operations); + proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK - create_seq_entry("partitions", 0, &proc_partitions_operations); + proc_create("partitions", 0, NULL, &proc_partitions_operations); #endif - create_seq_entry("stat", 0, &proc_stat_operations); - create_seq_entry("interrupts", 0, &proc_interrupts_operations); + proc_create("stat", 0, NULL, &proc_stat_operations); + proc_create("interrupts", 0, NULL, &proc_interrupts_operations); #ifdef CONFIG_SLABINFO - create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK - create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); + proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif #endif #ifdef CONFIG_MMU proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); #endif - create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); - create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); - create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); - create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); + proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); + proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK - create_seq_entry("diskstats", 0, &proc_diskstats_operations); + proc_create("diskstats", 0, NULL, &proc_diskstats_operations); #endif #ifdef CONFIG_MODULES - create_seq_entry("modules", 0, &proc_modules_operations); + proc_create("modules", 0, NULL, &proc_modules_operations); #endif #ifdef CONFIG_SCHEDSTATS - create_seq_entry("schedstat", 0, &proc_schedstat_operations); + proc_create("schedstat", 0, NULL, &proc_schedstat_operations); #endif #ifdef CONFIG_PROC_KCORE - proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); - if (proc_root_kcore) { - proc_root_kcore->proc_fops = &proc_kcore_operations; + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) proc_root_kcore->size = (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; - } #endif #ifdef CONFIG_PROC_PAGE_MONITOR - create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); - create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); + proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); + proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); #endif #ifdef CONFIG_PROC_VMCORE - proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); - if (proc_vmcore) - proc_vmcore->proc_fops = &proc_vmcore_operations; + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); #endif #ifdef CONFIG_MAGIC_SYSRQ - { - struct proc_dir_entry *entry; - entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); - if (entry) - entry->proc_fops = &proc_sysrq_trigger_operations; - } + proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); #endif } diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 49816e00b51..63f45383035 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -214,7 +214,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver) */ void __init proc_tty_init(void) { - struct proc_dir_entry *entry; if (!proc_mkdir("tty", NULL)) return; proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); @@ -227,7 +226,5 @@ void __init proc_tty_init(void) proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); - entry = create_proc_entry("tty/drivers", 0, NULL); - if (entry) - entry->proc_fops = &proc_tty_drivers_operations; + proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); } -- cgit v1.2.3 From 638fa202cdb207083a12d6f73e313605a8fc1037 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 29 Apr 2008 01:01:38 -0700 Subject: procfs: mem permission cleanup This cleans up the permission checks done for /proc/PID/mem i/o calls. It puts all the logic in a new function, check_mem_permission(). The old code repeated the (!MAY_PTRACE(task) || !ptrace_may_attach(task)) magical expression multiple times. The new function does all that work in one place, with clear comments. The old code called security_ptrace() twice on successful checks, once in MAY_PTRACE() and once in __ptrace_may_attach(). Now it's only called once, and only if all other checks have succeeded. Signed-off-by: Roland McGrath Cc: Alexey Dobriyan Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index b48ddb11994..fcf02f2deeb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -195,12 +195,32 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task_is_stopped_or_traced(task)) && \ - security_ptrace(current,task) == 0)) +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + /* + * A task can always look at itself, in case it chooses + * to use system calls instead of load instructions. + */ + if (task == current) + return 0; + + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. + */ + if (task->parent == current && (task->ptrace & PT_PTRACED) && + task_is_stopped_or_traced(task) && + ptrace_may_attach(task)) + return 0; + + /* + * Noone else is allowed. + */ + return -EPERM; +} struct mm_struct *mm_for_maps(struct task_struct *task) { @@ -722,7 +742,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; ret = -ENOMEM; @@ -748,7 +768,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { + if (!retval || check_mem_permission(task)) { if (!ret) ret = -EIO; break; @@ -792,7 +812,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; copied = -ENOMEM; -- cgit v1.2.3 From f649d6d32605c7573884613289fb3b9fbd4f99a1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:39 -0700 Subject: proc: simplify locking in remove_proc_entry() proc_subdir_lock protects only modifying and walking through PDE lists, so after we've found PDE to remove and actually removed it from lists, there is no need to hold proc_subdir_lock for the rest of operation. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 82 +++++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f501f3211ab..45d0076bc08 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -734,60 +734,58 @@ void free_proc_entry(struct proc_dir_entry *de) void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry **p; - struct proc_dir_entry *de; + struct proc_dir_entry *de = NULL; const char *fn = name; int len; if (!parent && xlate_proc_name(name, &parent, &fn) != 0) - goto out; + return; len = strlen(fn); spin_lock(&proc_subdir_lock); for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (!proc_match(len, fn, *p)) - continue; - de = *p; - *p = de->next; - de->next = NULL; - - spin_lock(&de->pde_unload_lock); - /* - * Stop accepting new callers into module. If you're - * dynamically allocating ->proc_fops, save a pointer somewhere. - */ - de->proc_fops = NULL; - /* Wait until all existing callers into module are done. */ - if (de->pde_users > 0) { - DECLARE_COMPLETION_ONSTACK(c); - - if (!de->pde_unload_completion) - de->pde_unload_completion = &c; - - spin_unlock(&de->pde_unload_lock); - spin_unlock(&proc_subdir_lock); + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) + return; - wait_for_completion(de->pde_unload_completion); + spin_lock(&de->pde_unload_lock); + /* + * Stop accepting new callers into module. If you're + * dynamically allocating ->proc_fops, save a pointer somewhere. + */ + de->proc_fops = NULL; + /* Wait until all existing callers into module are done. */ + if (de->pde_users > 0) { + DECLARE_COMPLETION_ONSTACK(c); + + if (!de->pde_unload_completion) + de->pde_unload_completion = &c; - spin_lock(&proc_subdir_lock); - goto continue_removing; - } spin_unlock(&de->pde_unload_lock); + wait_for_completion(de->pde_unload_completion); + + goto continue_removing; + } + spin_unlock(&de->pde_unload_lock); + continue_removing: - if (S_ISDIR(de->mode)) - parent->nlink--; - de->nlink = 0; - if (de->subdir) { - printk(KERN_WARNING "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); - WARN_ON(1); - } - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); - break; + if (S_ISDIR(de->mode)) + parent->nlink--; + de->nlink = 0; + if (de->subdir) { + printk(KERN_WARNING "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); + WARN_ON(1); } - spin_unlock(&proc_subdir_lock); -out: - return; + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); } -- cgit v1.2.3 From 7cee4e00e0f8aa7290266382ea903a5a1b92c9a1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:40 -0700 Subject: proc: less special case in xlate code If valid "parent" is passed to proc_create/remove_proc_entry(), then name of PDE should consist of only one path component, otherwise creation or or removal will fail. However, if NULL is passed as parent then create/remove accept full path as a argument. This is arbitrary restriction -- all infrastructure is in place. So, patch allows the following to succeed: create_proc_entry("foo/bar", 0, pde_baz); remove_proc_entry("baz/foo/bar", &proc_root); Also makes the following to behave identically: create_proc_entry("foo/bar", 0, NULL); create_proc_entry("foo/bar", 0, &proc_root); Discrepancy noticed by Den Lunev (IIRC). Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 45d0076bc08..3c6f5669523 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -277,8 +277,11 @@ static int xlate_proc_name(const char *name, int len; int rtn = 0; + de = *ret; + if (!de) + de = &proc_root; + spin_lock(&proc_subdir_lock); - de = &proc_root; while (1) { next = strchr(cp, '/'); if (!next) @@ -582,7 +585,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, /* make sure name is valid */ if (!name || !strlen(name)) goto out; - if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) + if (xlate_proc_name(name, parent, &fn) != 0) goto out; /* At this point there must not be any '/' characters beyond *fn */ @@ -738,7 +741,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) const char *fn = name; int len; - if (!parent && xlate_proc_name(name, &parent, &fn) != 0) + if (xlate_proc_name(name, &parent, &fn) != 0) return; len = strlen(fn); -- cgit v1.2.3 From 5e971dce0b2f6896e02372512df0d1fb0bfe2d55 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:41 -0700 Subject: proc: drop several "PDE valid/invalid" checks proc-misc code is noticeably full of "if (de)" checks when PDE passed is always valid. Remove them. Addition of such check in proc_lookup_de() is for failed lookup case. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 33 +++++++++++--------------- fs/proc/inode.c | 69 ++++++++++++++++++++++++++----------------------------- 2 files changed, 46 insertions(+), 56 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 3c6f5669523..8b406e21a25 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -388,20 +388,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, lock_kernel(); spin_lock(&proc_subdir_lock); - if (de) { - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - unsigned int ino; - - ino = de->low_ino; - de_get(de); - spin_unlock(&proc_subdir_lock); - error = -EINVAL; - inode = proc_get_inode(dir->i_sb, ino, de); - goto out_unlock; - } + for (de = de->subdir; de ; de = de->next) { + if (de->namelen != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { + unsigned int ino; + + ino = de->low_ino; + de_get(de); + spin_unlock(&proc_subdir_lock); + error = -EINVAL; + inode = proc_get_inode(dir->i_sb, ino, de); + goto out_unlock; } } spin_unlock(&proc_subdir_lock); @@ -413,7 +411,8 @@ out_unlock: d_add(dentry, inode); return NULL; } - de_put(de); + if (de) + de_put(de); return ERR_PTR(error); } @@ -443,10 +442,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, lock_kernel(); ino = inode->i_ino; - if (!de) { - ret = -EINVAL; - goto out; - } i = filp->f_pos; switch (i) { case 0: diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 82b3a1b5a70..6f4e8dc97da 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -25,8 +25,7 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) { - if (de) - atomic_inc(&de->count); + atomic_inc(&de->count); return de; } @@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) */ void de_put(struct proc_dir_entry *de) { - if (de) { - lock_kernel(); - if (!atomic_read(&de->count)) { - printk("de_put: entry %s already free!\n", de->name); - unlock_kernel(); - return; - } - - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); + lock_kernel(); + if (!atomic_read(&de->count)) { + printk("de_put: entry %s already free!\n", de->name); unlock_kernel(); + return; } + + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); + unlock_kernel(); } /* @@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, { struct inode * inode; - if (de != NULL && !try_module_get(de->owner)) + if (!try_module_get(de->owner)) goto out_mod; inode = iget_locked(sb, ino); @@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; - } else { - inode->i_fop = de->proc_fops; - } + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; } } unlock_new_inode(inode); @@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, return inode; out_ino: - if (de != NULL) - module_put(de->owner); + module_put(de->owner); out_mod: return NULL; } -- cgit v1.2.3 From 9c37066d888bf6e1b96ad12304971b3ddeabbad0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:41 -0700 Subject: proc: remove proc_bus Remove proc_bus export and variable itself. Using pathnames works fine and is slightly more understandable and greppable. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/root.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/root.c b/fs/proc/root.c index ef0fb57fc9e..cc46fcba802 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,7 +22,7 @@ #include "internal.h" -struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; +struct proc_dir_entry *proc_root_fs, *proc_root_driver; static int proc_test_super(struct super_block *sb, void *data) { @@ -137,7 +137,7 @@ void __init proc_root_init(void) #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif - proc_bus = proc_mkdir("bus", NULL); + proc_mkdir("bus", NULL); proc_sys_init(); } @@ -236,5 +236,4 @@ EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); -- cgit v1.2.3 From 36a5aeb8787fbf92510ed20d806e229c55726f93 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:42 -0700 Subject: proc: remove proc_root_fs Use creation by full path instead: "fs/foo". Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/cifs_debug.c | 4 ++-- fs/ext4/mballoc.c | 7 +++---- fs/jfs/jfs_debug.c | 4 ++-- fs/nfs/client.c | 6 +++--- fs/proc/root.c | 5 ++--- 5 files changed, 12 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0228ed06069..cc950f69e51 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -468,7 +468,7 @@ cifs_proc_init(void) { struct proc_dir_entry *pde; - proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); + proc_fs_cifs = proc_mkdir("fs/cifs", NULL); if (proc_fs_cifs == NULL) return; @@ -559,7 +559,7 @@ cifs_proc_clean(void) remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("Experimental", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); - remove_proc_entry("cifs", proc_root_fs); + remove_proc_entry("fs/cifs", NULL); } static int diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ef97f19c2f9..1efcb934c2d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2867,7 +2867,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb) mb_debug("freed %u blocks in %u structures\n", count, count2); } -#define EXT4_ROOT "ext4" #define EXT4_MB_STATS_NAME "stats" #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" @@ -3007,9 +3006,9 @@ int __init init_ext4_mballoc(void) return -ENOMEM; } #ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); + proc_root_ext4 = proc_mkdir("fs/ext4", NULL); if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); + printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); #endif return 0; } @@ -3020,7 +3019,7 @@ void exit_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS - remove_proc_entry(EXT4_ROOT, proc_root_fs); + remove_proc_entry("fs/ext4", NULL); #endif } diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 887f5759e53..bf6ab19b86e 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -89,7 +89,7 @@ void jfs_proc_init(void) { int i; - if (!(base = proc_mkdir("jfs", proc_root_fs))) + if (!(base = proc_mkdir("fs/jfs", NULL))) return; base->owner = THIS_MODULE; @@ -109,7 +109,7 @@ void jfs_proc_clean(void) if (base) { for (i = 0; i < NPROCENT; i++) remove_proc_entry(Entries[i].name, base); - remove_proc_entry("jfs", proc_root_fs); + remove_proc_entry("fs/jfs", NULL); } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2f3b284e6d..0e066dcd470 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1500,7 +1500,7 @@ int __init nfs_fs_proc_init(void) { struct proc_dir_entry *p; - proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); if (!proc_fs_nfs) goto error_0; @@ -1526,7 +1526,7 @@ int __init nfs_fs_proc_init(void) error_2: remove_proc_entry("servers", proc_fs_nfs); error_1: - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1538,7 +1538,7 @@ void nfs_fs_proc_exit(void) { remove_proc_entry("volumes", proc_fs_nfs); remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/proc/root.c b/fs/proc/root.c index cc46fcba802..596abb690af 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,7 +22,7 @@ #include "internal.h" -struct proc_dir_entry *proc_root_fs, *proc_root_driver; +struct proc_dir_entry *proc_root_driver; static int proc_test_super(struct super_block *sb, void *data) { @@ -126,7 +126,7 @@ void __init proc_root_init(void) #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif - proc_root_fs = proc_mkdir("fs", NULL); + proc_mkdir("fs", NULL); proc_root_driver = proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) @@ -235,5 +235,4 @@ EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_fs); EXPORT_SYMBOL(proc_root_driver); -- cgit v1.2.3 From 928b4d8c8963e75bdb133f562b03b07f9aa4844a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:44 -0700 Subject: proc: remove proc_root_driver Use creation by full path: "driver/foo". Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/root.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/proc/root.c b/fs/proc/root.c index 596abb690af..5e93e9b0124 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,8 +22,6 @@ #include "internal.h" -struct proc_dir_entry *proc_root_driver; - static int proc_test_super(struct super_block *sb, void *data) { return sb->s_fs_info == data; @@ -127,7 +125,7 @@ void __init proc_root_init(void) proc_mkdir("sysvipc", NULL); #endif proc_mkdir("fs", NULL); - proc_root_driver = proc_mkdir("driver", NULL); + proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ @@ -235,4 +233,3 @@ EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_driver); -- cgit v1.2.3 From c74c120a21d87b0b6925ada5830d8cac21e852d9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:44 -0700 Subject: proc: remove proc_root from drivers Remove proc_root export. Creation and removal works well if parent PDE is supplied as NULL -- it worked always that way. So, one useless export removed and consistency added, some drivers created PDEs with &proc_root as parent but removed them as NULL and so on. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 1 + fs/proc/proc_misc.c | 2 +- fs/proc/root.c | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index b1d6df671ed..28cbca80590 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include +extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); #else diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index c4137bb94a9..48bcf20cec2 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -854,7 +854,7 @@ void __init proc_misc_init(void) /* And now for trickier ones */ #ifdef CONFIG_PRINTK - proc_create("kmsg", S_IRUSR, &proc_root, &proc_kmsg_operations); + proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif proc_create("locks", 0, NULL, &proc_locks_operations); proc_create("devices", 0, NULL, &proc_devinfo_operations); diff --git a/fs/proc/root.c b/fs/proc/root.c index 5e93e9b0124..c741b45a550 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -232,4 +232,3 @@ EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); -EXPORT_SYMBOL(proc_root); -- cgit v1.2.3 From 8731f14d37825b54ad0c4c309cba2bc8fdf13a86 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:58 -0700 Subject: proc: remove ->get_info infrastructure Now that last dozen or so users of ->get_info were removed, ditch it too. Everyone sane shouldd have switched to seq_file interface long ago. P.S.: Co-existing 3 interfaces (->get_info/->read_proc/->proc_fops) for proc is long-standing crap, BTW, thus a) put ->read_proc/->write_proc/read_proc_entry() users on death row, b) new such users should be rejected, c) everyone is encouraged to convert his favourite ->read_proc user or I'll do it, lazy bastards. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8b406e21a25..0f3d97d41b0 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); start = NULL; - if (dp->get_info) { - /* Handle old net routines */ - n = dp->get_info(page, &start, *ppos, count); - if (n < count) - eof = 1; - } else if (dp->read_proc) { + if (dp->read_proc) { /* * How to be a proc read function * ------------------------------ -- cgit v1.2.3 From b640a89ddd742782bd2d83873da30d4776d1b9c6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:58 -0700 Subject: proc: convert /proc/tty/ldiscs to seq_file interface Note: THIS_MODULE and header addition aren't technically needed because this code is not modular, but let's keep it anyway because people can copy this code into modular code. Signed-off-by: Alexey Dobriyan Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_tty.c | 76 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 63f45383035..ac26ccc25f4 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -5,7 +5,7 @@ */ #include - +#include #include #include #include @@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = { .release = seq_release, }; -/* - * This is the handler for /proc/tty/ldiscs - */ -static int tty_ldiscs_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { - int i; - int len = 0; - off_t begin = 0; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) +{ +} + +static int tty_ldiscs_seq_show(struct seq_file *m, void *v) +{ + int i = *(loff_t *)v; struct tty_ldisc *ld; - for (i=0; i < NR_LDISCS; i++) { - ld = tty_ldisc_get(i); - if (ld == NULL) - continue; - len += sprintf(page+len, "%-10s %2d\n", - ld->name ? ld->name : "???", i); - tty_ldisc_put(i); - if (len+begin > off+count) - break; - if (len+begin < off) { - begin += len; - len = 0; - } - } - if (i >= NR_LDISCS) - *eof = 1; - if (off >= len+begin) + ld = tty_ldisc_get(i); + if (ld == NULL) return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); + seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i); + tty_ldisc_put(i); + return 0; +} + +static const struct seq_operations tty_ldiscs_seq_ops = { + .start = tty_ldiscs_seq_start, + .next = tty_ldiscs_seq_next, + .stop = tty_ldiscs_seq_stop, + .show = tty_ldiscs_seq_show, +}; + +static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tty_ldiscs_seq_ops); } +static const struct file_operations tty_ldiscs_proc_fops = { + .owner = THIS_MODULE, + .open = proc_tty_ldiscs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/ @@ -223,8 +238,7 @@ void __init proc_tty_init(void) * password lengths and inter-keystroke timings during password * entry. */ - proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); - - create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); + proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); + proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); } -- cgit v1.2.3 From 59b7435149eab2dd06dd678742faff6049cb655f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:00 -0700 Subject: proc: introduce proc_create_data to setup de->data This set of patches fixes an proc ->open'less usage due to ->proc_fops flip in the most part of the kernel code. The original OOPS is described in the commit 2d3a4e3666325a9709cc8ea2e88151394e8f20fc: Typical PDE creation code looks like: pde = create_proc_entry("foo", 0, NULL); if (pde) pde->proc_fops = &foo_proc_fops; Notice that PDE is first created, only then ->proc_fops is set up to final value. This is a problem because right after creation a) PDE is fully visible in /proc , and b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's possible to ->read without ->open (see one class of oopses below). The fix is new API called proc_create() which makes sure ->proc_fops are set up before gluing PDE to main tree. Typical new code looks like: pde = proc_create("foo", 0, NULL, &foo_proc_fops); if (!pde) return -ENOMEM; Fix most networking users for a start. In the long run, create_proc_entry() for regular files will go. In addition to this, proc_create_data is introduced to fix reading from proc without PDE->data. The race is basically the same as above. create_proc_entries is replaced in the entire kernel code as new method is also simply better. This patch: The problem is the same as for de->proc_fops. Right now PDE becomes visible without data set. So, the entry could be looked up without data. This, in most cases, will simply OOPS. proc_create_data call is created to address this issue. proc_create now becomes a wrapper around it. Signed-off-by: Denis V. Lunev Cc: "Eric W. Biederman" Cc: "J. Bruce Fields" Cc: Alessandro Zummo Cc: Alexey Dobriyan Cc: Bartlomiej Zolnierkiewicz Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Chris Mason Acked-by: David Howells Cc: Dmitry Torokhov Cc: Geert Uytterhoeven Cc: Grant Grundler Cc: Greg Kroah-Hartman Cc: Haavard Skinnemoen Cc: Heiko Carstens Cc: Ingo Molnar Cc: James Bottomley Cc: Jaroslav Kysela Cc: Jeff Garzik Cc: Jeff Mahoney Cc: Jesper Nilsson Cc: Karsten Keil Cc: Kyle McMartin Cc: Len Brown Cc: Martin Schwidefsky Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Mauro Carvalho Chehab Cc: Mikael Starvik Cc: Nadia Derbey Cc: Neil Brown Cc: Paul Mackerras Cc: Peter Osterlund Cc: Pierre Peiffer Cc: Russell King Cc: Takashi Iwai Cc: Tony Luck Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 8 +++++--- fs/proc/root.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0f3d97d41b0..9d53b39a9cf 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -675,9 +675,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_create(const char *name, mode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops) +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, + void *data) { struct proc_dir_entry *pde; nlink_t nlink; @@ -698,6 +699,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode, if (!pde) goto out; pde->proc_fops = proc_fops; + pde->data = data; if (proc_register(parent, pde) < 0) goto out_free; return pde; diff --git a/fs/proc/root.c b/fs/proc/root.c index c741b45a550..95117538a4f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -230,5 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); -EXPORT_SYMBOL(proc_create); +EXPORT_SYMBOL(proc_create_data); EXPORT_SYMBOL(remove_proc_entry); -- cgit v1.2.3 From 9ef2db2630652d68dfd336088648adae7ef0bcd4 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:04 -0700 Subject: nfsd: use proc_create to setup de->proc_fops Use proc_create() to make sure that ->proc_fops be setup before gluing PDE to main tree. Signed-off-by: Denis V. Lunev Cc: Neil Brown Cc: "J. Bruce Fields" Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfsctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 42f3820ee8f..5ac00c4fee9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -169,6 +169,7 @@ static const struct file_operations exports_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /*----------------------------------------------------------------------------*/ @@ -801,10 +802,9 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = create_proc_entry("fs/nfs/exports", 0, NULL); + entry = proc_create("exports", 0, entry, &exports_operations); if (!entry) return -ENOMEM; - entry->proc_fops = &exports_operations; return 0; } #else /* CONFIG_PROC_FS */ -- cgit v1.2.3 From 34b37235c60fd23e4075da475c7bb22e6c7a466e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:07 -0700 Subject: nfs: use proc_create to setup de->proc_fops Use proc_create() to make sure that ->proc_fops be setup before gluing PDE to main tree. Signed-off-by: Denis V. Lunev Cc: "J. Bruce Fields" Cc: Trond Myklebust Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/client.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0e066dcd470..89ac5bb0401 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int nfs_volume_list_open(struct inode *inode, struct file *file); @@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /* @@ -1507,20 +1509,16 @@ int __init nfs_fs_proc_init(void) proc_fs_nfs->owner = THIS_MODULE; /* a file of servers with which we're dealing */ - p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("servers", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_server_list_fops); if (!p) goto error_1; - p->proc_fops = &nfs_server_list_fops; - p->owner = THIS_MODULE; - /* a file of volumes that we have mounted */ - p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("volumes", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_volume_list_fops); if (!p) goto error_2; - - p->proc_fops = &nfs_volume_list_fops; - p->owner = THIS_MODULE; return 0; error_2: -- cgit v1.2.3 From 21ac295b42b8bdc3d677aba6bd7308a38de28a9b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:07 -0700 Subject: afs: use non-racy method for proc entries creation Use proc_create()/proc_create_data() to make sure that ->proc_fops and ->data be setup before gluing PDE to main tree. Signed-off-by: Denis V. Lunev Acked-by: David Howells Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/proc.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 846c7615ac9..9f7d1ae7026 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = { .write = afs_proc_cells_write, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int afs_proc_rootcell_open(struct inode *inode, struct file *file); @@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = { .read = afs_proc_rootcell_read, .write = afs_proc_rootcell_write, .llseek = no_llseek, - .release = afs_proc_rootcell_release + .release = afs_proc_rootcell_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); @@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_volumes_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_vlservers_open(struct inode *inode, @@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_vlservers_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); @@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_servers_release, + .owner = THIS_MODULE, }; /* @@ -143,17 +148,13 @@ int afs_proc_init(void) goto error_dir; proc_afs->owner = THIS_MODULE; - p = create_proc_entry("cells", 0, proc_afs); + p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops); if (!p) goto error_cells; - p->proc_fops = &afs_proc_cells_fops; - p->owner = THIS_MODULE; - p = create_proc_entry("rootcell", 0, proc_afs); + p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops); if (!p) goto error_rootcell; - p->proc_fops = &afs_proc_rootcell_fops; - p->owner = THIS_MODULE; _leave(" = 0"); return 0; @@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell) if (!cell->proc_dir) goto error_dir; - p = create_proc_entry("servers", 0, cell->proc_dir); + p = proc_create_data("servers", 0, cell->proc_dir, + &afs_proc_cell_servers_fops, cell); if (!p) goto error_servers; - p->proc_fops = &afs_proc_cell_servers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("vlservers", 0, cell->proc_dir); + p = proc_create_data("vlservers", 0, cell->proc_dir, + &afs_proc_cell_vlservers_fops, cell); if (!p) goto error_vlservers; - p->proc_fops = &afs_proc_cell_vlservers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("volumes", 0, cell->proc_dir); + p = proc_create_data("volumes", 0, cell->proc_dir, + &afs_proc_cell_volumes_fops, cell); if (!p) goto error_volumes; - p->proc_fops = &afs_proc_cell_volumes_fops; - p->owner = THIS_MODULE; - p->data = cell; _leave(" = 0"); return 0; -- cgit v1.2.3 From 46fe74f2aed615c8c88164f4346b79c30cfd7c3d Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:08 -0700 Subject: ext4: use non-racy method for proc entries creation Use proc_create()/proc_create_data() to make sure that ->proc_fops and ->data be setup before gluing PDE to main tree. Signed-off-by: Denis V. Lunev Cc: Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/mballoc.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1efcb934c2d..9d57695de74 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2449,17 +2449,10 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_mb_proc != NULL) { - struct proc_dir_entry *p; - p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_history_fops; - p->data = sb; - } - p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_groups_fops; - p->data = sb; - } + proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_history_fops, sb); + proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_groups_fops, sb); } sbi->s_mb_history_max = 1000; -- cgit v1.2.3 From 19b4fc52d63b77adf700a215bfbabd680a8f1718 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:09 -0700 Subject: reiserfs: use non-racy method for proc entries creation Use proc_create()/proc_create_data() to make sure that ->proc_fops and ->data be setup before gluing PDE to main tree. /proc entry owner is also added. Signed-off-by: Denis V. Lunev Cc: Jeff Mahoney Cc: Chris Mason Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/procfs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 8f86c52b30d..b9dbeeca704 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static struct proc_dir_entry *proc_info_root = NULL; @@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, int (*func) (struct seq_file *, struct super_block *)) { - struct proc_dir_entry *de; - de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); - if (de) { - de->data = func; - de->proc_fops = &r_file_operations; - } + proc_create_data(name, 0, REISERFS_SB(sb)->procdir, + &r_file_operations, func); } int reiserfs_proc_info_init(struct super_block *sb) -- cgit v1.2.3 From 79da3664f61640057041bf172b1457e2d1969330 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:11 -0700 Subject: jbd2: use non-racy method for proc entries creation Use proc_create()/proc_create_data() to make sure that ->proc_fops and ->data be setup before gluing PDE to main tree. Signed-off-by: Denis V. Lunev Cc: Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 954cff001df..eb7eb6c27bc 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -904,19 +904,10 @@ static void jbd2_stats_proc_init(journal_t *journal) snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { - struct proc_dir_entry *p; - p = create_proc_entry("history", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_history_fops; - p->data = journal; - p = create_proc_entry("info", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_info_fops; - p->data = journal; - } - } + proc_create_data("history", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_history_fops, journal); + proc_create_data("info", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_info_fops, journal); } } -- cgit v1.2.3 From 7708bfb1c855f2a076ef71cc21647deea022ebe7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:02:40 -0700 Subject: sysctl: merge equal proc_sys_read and proc_sys_write Many (most of) sysctls do not have a per-container sense. E.g. kernel.print_fatal_signals, vm.panic_on_oom, net.core.netdev_budget and so on and so forth. Besides, tuning then from inside a container is not even secure. On the other hand, hiding them completely from the container's tasks sometimes causes user-space to stop working. When developing net sysctl, the common practice was to duplicate a table and drop the write bits in table->mode, but this approach was not very elegant, lead to excessive memory consumption and was not suitable in general. Here's the alternative solution. To facilitate the per-container sysctls ctl_table_root-s were introduced. Each root contains a list of ctl_table_header-s that are visible to different namespaces. The idea of this set is to add the permissions() callback on the ctl_table_root to allow ctl root limit permissions to the same ctl_table-s. The main user of this functionality is the net-namespaces code, but later this will (should) be used by more and more namespaces, containers and control groups. Actually, this idea's core is in a single hunk in the third patch. First two patches are cleanups for sysctl code, while the third one mostly extends the arguments set of some sysctl functions. This patch: These ->read and ->write callbacks act in a very similar way, so merge these paths to reduce the number of places to patch later and shrink the .text size (a bit). Signed-off-by: Pavel Emelyanov Acked-by: "David S. Miller" Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Denis V. Lunev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 50 +++++++++++--------------------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 614c34b6d1c..5e31585292a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -165,8 +165,8 @@ out: return err; } -static ssize_t proc_sys_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, + size_t count, loff_t *ppos, int write) { struct dentry *dentry = filp->f_dentry; struct ctl_table_header *head; @@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(table, MAY_READ)) + if (sysctl_perm(table, write ? MAY_WRITE : MAY_READ)) goto out; /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, 0, filp, buf, &res, ppos); + error = table->proc_handler(table, write, filp, buf, &res, ppos); if (!error) error = res; out: @@ -204,44 +204,16 @@ out: return error; } -static ssize_t proc_sys_write(struct file *filp, const char __user *buf, +static ssize_t proc_sys_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; - ssize_t error; - size_t res; - - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; - - /* - * At this point we know that the sysctl was not unregistered - * and won't be until we finish. - */ - error = -EPERM; - if (sysctl_perm(table, MAY_WRITE)) - goto out; - - /* careful: calling conventions are nasty here */ - res = count; - error = table->proc_handler(table, 1, filp, (char __user *)buf, - &res, ppos); - if (!error) - error = res; -out: - sysctl_head_finish(head); + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); +} - return error; +static ssize_t proc_sys_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); } -- cgit v1.2.3 From d7321cd62470b70d2717dae5a963e7a8fabff4d5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:02:44 -0700 Subject: sysctl: add the ->permissions callback on the ctl_table_root When reading from/writing to some table, a root, which this table came from, may affect this table's permissions, depending on who is working with the table. The core hunk is at the bottom of this patch. All the rest is just pushing the ctl_table_root argument up to the sysctl_perm() function. This will be mostly (only?) used in the net sysctls. Signed-off-by: Pavel Emelyanov Acked-by: David S. Miller Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Denis V. Lunev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5e31585292a..5acc001d49f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -190,7 +190,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(table, write ? MAY_WRITE : MAY_READ)) + if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; /* careful: calling conventions are nasty here */ @@ -388,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * goto out; /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(table, mask); + error = sysctl_perm(head->root, table, mask); out: sysctl_head_finish(head); return error; -- cgit v1.2.3 From 801678c5a3b4c79236970bcca27c733f5559e0d1 Mon Sep 17 00:00:00 2001 From: Hirofumi Nakagawa Date: Tue, 29 Apr 2008 01:03:09 -0700 Subject: Remove duplicated unlikely() in IS_ERR() Some drivers have duplicated unlikely() macros. IS_ERR() already has unlikely() in itself. This patch cleans up such pointless code. Signed-off-by: Hirofumi Nakagawa Acked-by: David S. Miller Acked-by: Jeff Garzik Cc: Paul Clements Cc: Richard Purdie Cc: Alessandro Zummo Cc: David Brownell Cc: James Bottomley Cc: Michael Halcrow Cc: Anton Altaparmakov Cc: Al Viro Cc: Carsten Otte Cc: Patrick McHardy Cc: Paul Mundt Cc: Jaroslav Kysela Cc: Takashi Iwai Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/inotify_user.c | 2 +- fs/ntfs/mft.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 81c01290939..a175ad650b6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1604,7 +1604,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * event using the eventfd_signal() function. */ req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); - if (unlikely(IS_ERR(req->ki_eventfd))) { + if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); goto out_put_req; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1623ebf25e5..0a1397335a8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -111,7 +111,7 @@ ecryptfs_do_create(struct inode *directory_inode, lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); lower_dir_dentry = lock_parent(lower_dentry); - if (unlikely(IS_ERR(lower_dir_dentry))) { + if (IS_ERR(lower_dir_dentry)) { ecryptfs_printk(KERN_ERR, "Error locking directory of " "dentry\n"); rc = PTR_ERR(lower_dir_dentry); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 7b94a1e3c01..6676c06bb7c 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void) } ih = inotify_init(&inotify_user_ops); - if (unlikely(IS_ERR(ih))) { + if (IS_ERR(ih)) { ret = PTR_ERR(ih); goto out_free_dev; } diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 2ad5c8b104b..790defb847e 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, if (size) { page = ntfs_map_page(mftbmp_mapping, ofs >> PAGE_CACHE_SHIFT); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read mft " "bitmap, aborting."); return PTR_ERR(page); @@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) } /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(mft_vi->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing mft record " "to format 0x%llx.", (long long)mft_no); return PTR_ERR(page); @@ -2519,7 +2519,7 @@ mft_rec_already_initialized: ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(vol->mft_ino->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing allocated " "mft record 0x%llx.", (long long)bit); err = PTR_ERR(page); -- cgit v1.2.3 From 0ae52d6fbaf7ffe4d00876d25ea000e94f85819c Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 01:03:20 -0700 Subject: afs: use the shorter LIST_HEAD for brevity Signed-off-by: Robert P. J. Day Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/cell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 584bb0f9c36..5e1df14e16b 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -20,7 +20,7 @@ DECLARE_RWSEM(afs_proc_cells_sem); LIST_HEAD(afs_proc_cells); -static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); +static LIST_HEAD(afs_cells); static DEFINE_RWLOCK(afs_cells_lock); static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); -- cgit v1.2.3 From 7c80bcce34a355c0920f8cab250d766d7827341d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:03:21 -0700 Subject: afs: the AFS RPC op CBGetCapabilities is actually CBTellMeAboutYourself The AFS RxRPC op CBGetCapabilities is actually CBTellMeAboutYourself. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/afs_cm.h | 2 +- fs/afs/cmservice.c | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h index 7b4d4fab4c8..5f62f3ea563 100644 --- a/fs/afs/afs_cm.h +++ b/fs/afs/afs_cm.h @@ -24,7 +24,7 @@ enum AFS_CM_Operations { CBGetXStatsVersion = 209, /* get version of extended statistics */ CBGetXStats = 210, /* get contents of extended statistics data */ CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ - CBGetCapabilities = 65538, /* get client capabilities */ + CBTellMeAboutYourself = 65538, /* get client capabilities */ }; #define AFS_CAP_ERROR_TRANSLATION 0x1 diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 47b71c8947f..75da3d04612 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -26,8 +26,8 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, - bool); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, + struct sk_buff *, bool); static void afs_cm_destructor(struct afs_call *); /* @@ -71,11 +71,11 @@ static const struct afs_call_type afs_SRXCBProbe = { }; /* - * CB.GetCapabilities operation type + * CB.TellMeAboutYourself operation type */ -static const struct afs_call_type afs_SRXCBGetCapabilites = { - .name = "CB.GetCapabilities", - .deliver = afs_deliver_cb_get_capabilities, +static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { + .name = "CB.TellMeAboutYourself", + .deliver = afs_deliver_cb_tell_me_about_yourself, .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, }; @@ -103,8 +103,8 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; - case CBGetCapabilities: - call->type = &afs_SRXCBGetCapabilites; + case CBTellMeAboutYourself: + call->type = &afs_SRXCBTellMeAboutYourself; return true; default: return false; @@ -395,7 +395,7 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, /* * allow the fileserver to ask about the cache manager's capabilities */ -static void SRXAFSCB_GetCapabilities(struct work_struct *work) +static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) { struct afs_interface *ifs; struct afs_call *call = container_of(work, struct afs_call, work); @@ -456,10 +456,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work) } /* - * deliver request data to a CB.GetCapabilities call + * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_get_capabilities(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, + struct sk_buff *skb, bool last) { _enter(",{%u},%d", skb->len, last); @@ -471,7 +471,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call, /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; - INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); + INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); schedule_work(&call->work); return 0; } -- cgit v1.2.3 From 9396d496d74587d46a74b93a8b6b41659d2daf2e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:03:22 -0700 Subject: afs: support the CB.ProbeUuid RPC op Add support for the CB.ProbeUuid cache manager RPC op. This allows a modern OpenAFS server to quickly ask if the client has been rebooted. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/afs_cm.h | 1 + fs/afs/cmservice.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) (limited to 'fs') diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h index 5f62f3ea563..255f5dd6040 100644 --- a/fs/afs/afs_cm.h +++ b/fs/afs/afs_cm.h @@ -24,6 +24,7 @@ enum AFS_CM_Operations { CBGetXStatsVersion = 209, /* get version of extended statistics */ CBGetXStats = 210, /* get contents of extended statistics data */ CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ + CBProbeUuid = 214, /* check the client hasn't rebooted */ CBTellMeAboutYourself = 65538, /* get client capabilities */ }; diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 75da3d04612..eb765489164 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -26,6 +26,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); +static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, struct sk_buff *, bool); static void afs_cm_destructor(struct afs_call *); @@ -70,6 +71,16 @@ static const struct afs_call_type afs_SRXCBProbe = { .destructor = afs_cm_destructor, }; +/* + * CB.ProbeUuid operation type + */ +static const struct afs_call_type afs_SRXCBProbeUuid = { + .name = "CB.ProbeUuid", + .deliver = afs_deliver_cb_probe_uuid, + .abort_to_error = afs_abort_to_error, + .destructor = afs_cm_destructor, +}; + /* * CB.TellMeAboutYourself operation type */ @@ -392,6 +403,102 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, return 0; } +/* + * allow the fileserver to quickly find out if the fileserver has been rebooted + */ +static void SRXAFSCB_ProbeUuid(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + struct afs_uuid *r = call->request; + + struct { + __be32 match; + } reply; + + _enter(""); + + + if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + reply.match = htonl(0); + else + reply.match = htonl(1); + + afs_send_simple_reply(call, &reply, sizeof(reply)); + _leave(""); +} + +/* + * deliver request data to a CB.ProbeUuid call + */ +static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, + bool last) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + + if (skb->len > 0) + return -EBADMSG; + if (!last) + return 0; + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, skb, last, call->buffer, + 11 * sizeof(__be32)); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + _debug("trailer"); + if (skb->len != 0) + return -EBADMSG; + break; + } + + if (!last) + return 0; + + call->state = AFS_CALL_REPLYING; + + INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); + schedule_work(&call->work); + return 0; +} + /* * allow the fileserver to ask about the cache manager's capabilities */ -- cgit v1.2.3 From 803f445f17aa1b71235ad6febae734dd7ad23ddd Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 01:03:43 -0700 Subject: fat: use get/put_unaligned_* helpers Signed-off-by: Harvey Harrison Acked-by: OGAWA Hirofumi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5f522a55b59..4e0a3dd9d67 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, brelse(bh); goto out_invalid; } - logical_sector_size = - le16_to_cpu(get_unaligned((__le16 *)&b->sector_size)); + logical_sector_size = get_unaligned_le16(&b->sector_size); if (!is_power_of_2(logical_sector_size) || (logical_sector_size < 512) || (logical_sector_size > 4096)) { @@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; - sbi->dir_entries = - le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries)); + sbi->dir_entries = get_unaligned_le16(&b->dir_entries); if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) printk(KERN_ERR "FAT: bogus directroy-entries per block" @@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, rootdir_sectors = sbi->dir_entries * sizeof(struct msdos_dir_entry) / sb->s_blocksize; sbi->data_start = sbi->dir_start + rootdir_sectors; - total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); + total_sectors = get_unaligned_le16(&b->sectors); if (total_sectors == 0) total_sectors = le32_to_cpu(b->total_sect); -- cgit v1.2.3 From 8b3789e5d552b8ba4841926066ef0ccd664e209c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 01:03:44 -0700 Subject: hfsplus: use get/put_unaligned_* helpers Signed-off-by: Harvey Harrison Cc: Roman Zippel Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/wrapper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 72cab78f050..175d08eacc8 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) return 0; wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); - extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); + extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; wd->embed_count = extent & 0xFFFF; -- cgit v1.2.3 From 58d485d481013b47f50b7cd2cf9eab7795a0fcbd Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 01:03:44 -0700 Subject: isofs: use get/put_unaligned_* helpers Signed-off-by: Harvey Harrison Cc: Jan Kara Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/isofs/isofs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index d1bdf8adb35..ccbf72faf27 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -78,29 +78,29 @@ static inline int isonum_712(char *p) } static inline unsigned int isonum_721(char *p) { - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_722(char *p) { - return be16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_be16(p); } static inline unsigned int isonum_723(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_731(char *p) { - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } static inline unsigned int isonum_732(char *p) { - return be32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_be32(p); } static inline unsigned int isonum_733(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } extern int iso_date(char *, int); -- cgit v1.2.3 From 97a4feb4a78ae5cd130be7d546471a0779f1aa14 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 01:03:45 -0700 Subject: ncpfs: use get/put_unaligned_* helpers [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Harvey Harrison Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ncpfs/ncplib_kernel.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index df6d60bdfcd..97645f11211 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction } static inline char * - ncp_reply_data(struct ncp_server *server, int offset) +ncp_reply_data(struct ncp_server *server, int offset) { return &(server->packet[sizeof(struct ncp_reply_header) + offset]); } -static inline __u8 BVAL(void* data) +static inline u8 BVAL(void *data) { - return get_unaligned((__u8*)data); + return *(u8 *)data; } -static __u8 - ncp_reply_byte(struct ncp_server *server, int offset) +static u8 ncp_reply_byte(struct ncp_server *server, int offset) { - return get_unaligned((__u8 *) ncp_reply_data(server, offset)); + return *(u8 *)ncp_reply_data(server, offset); } -static inline __u16 WVAL_LH(void* data) +static inline u16 WVAL_LH(void *data) { - return le16_to_cpu(get_unaligned((__le16*)data)); + return get_unaligned_le16(data); } -static __u16 - ncp_reply_le16(struct ncp_server *server, int offset) +static u16 +ncp_reply_le16(struct ncp_server *server, int offset) { - return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); + return get_unaligned_le16(ncp_reply_data(server, offset)); } -static __u16 - ncp_reply_be16(struct ncp_server *server, int offset) +static u16 +ncp_reply_be16(struct ncp_server *server, int offset) { - return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); + return get_unaligned_be16(ncp_reply_data(server, offset)); } -static inline __u32 DVAL_LH(void* data) +static inline u32 DVAL_LH(void *data) { - return le32_to_cpu(get_unaligned((__le32*)data)); + return get_unaligned_le32(data); } static __le32 - ncp_reply_dword(struct ncp_server *server, int offset) +ncp_reply_dword(struct ncp_server *server, int offset) { - return get_unaligned((__le32 *) ncp_reply_data(server, offset)); + return get_unaligned((__le32 *)ncp_reply_data(server, offset)); } static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { @@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id, result = ncp_request2(server, 72, bounce, bufsize); ncp_unlock_server(server); if (!result) { - int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + - sizeof(struct ncp_reply_header)))); + int len = get_unaligned_be16((char *)bounce + + sizeof(struct ncp_reply_header)); result = -EIO; if (len <= to_read) { char* source; -- cgit v1.2.3 From 39fa00311f21318cc498b139c2cc2830dcad98ff Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 29 Apr 2008 01:03:48 -0700 Subject: aio: fix misleading comments The FIXME comments are inaccurate. The locking comment over lookup_ioctx() is wrong. Signed-off-by: Jeff Moyer Signed-off-by: Zach Brown Signed-off-by: Shen Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index a175ad650b6..99c2352906a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -277,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (ctx->max_reqs == 0) goto out_cleanup; - /* now link into global list. kludge. FIXME */ + /* now link into global list. */ write_lock(&mm->ioctx_list_lock); ctx->next = mm->ioctx_list; mm->ioctx_list = ctx; @@ -553,9 +553,6 @@ int aio_put_req(struct kiocb *req) return ret; } -/* Lookup an ioctx id. ioctx_list is lockless for reads. - * FIXME: this is O(n) and is only suitable for development. - */ static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; -- cgit v1.2.3 From 9b1ec9eceabe0c90d12116871f692263b69d476d Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 29 Apr 2008 20:15:43 +0000 Subject: [CIFS] Remove duplicate call to mode_to_acl The current logic in cifs_setattr calls mode_to_acl twice on mode changes if cifsacl is enabled. Remove the duplicate call. Signed-off-by: Jeff Layton CC: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/inode.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8eaa9e72fe8..a1f24bdb656 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1581,10 +1581,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); } -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - mode_to_acl(direntry->d_inode, full_path, mode); -#endif } if (attrs->ia_valid & ATTR_ATIME) { -- cgit v1.2.3 From 355a46961b58012de239cafccbfce4c9321d4395 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 29 Apr 2008 16:01:22 -0400 Subject: trivial: fix user-visible typo in hfsplus Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b0f9ad362d1..946466cd9f2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { - printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " + printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " "use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } -- cgit v1.2.3 From 53b7e9f6807c1274eee19201396b4c2b5f721553 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Apr 2008 22:02:11 -0400 Subject: ext4: Fix update of mtime and ctime on rename The patch below makes ext4 update mtime and ctime of the directory into which we move file even if the directory entry already exists. Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 02cdaec39e2..7fc1bc1c16d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2354,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, EXT4_FEATURE_INCOMPAT_FILETYPE)) new_de->file_type = old_de->file_type; new_dir->i_version++; + new_dir->i_ctime = new_dir->i_mtime = + ext4_current_time(new_dir); + ext4_mark_inode_dirty(handle, new_dir); BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, new_bh); brelse(new_bh); -- cgit v1.2.3 From 2887df139c40512cdc147d1a84d95d4f3d261bd1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Apr 2008 22:02:07 -0400 Subject: ext4: Fix hang on umount with quotas when journal is aborted Call dquot_drop() from ext4_dquot_drop() even if we fail to start a transaction. Otherwise we never get to dropping references to quota structures from the inode and umount will hang indefinitely. Thanks to Payphone LIOU for spotting the problem. Signed-off-by: Jan Kara Signed-off-by: Mingming Cao CC: Payphone LIOU Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 93a7e746f42..e3b3483b600 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3040,8 +3040,14 @@ static int ext4_dquot_drop(struct inode *inode) /* We may delete quota structure so we need to reserve enough blocks */ handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + /* + * We call dquot_drop() anyway to at least release references + * to quota structures so that umount does not hang. + */ + dquot_drop(inode); return PTR_ERR(handle); + } ret = dquot_drop(inode); err = ext4_journal_stop(handle); if (!ret) -- cgit v1.2.3 From 216553c4b7f3e3e2beb4981cddca9b2027523928 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Apr 2008 22:02:02 -0400 Subject: ext4: fix wrong gfp type under transaction This fixes the allocations with GFP_KERNEL while under a transaction problems in ext4. This patch is the same as its ext3 counterpart, just switches these to GFP_NOFS. Signed-off-by: Josef Bacik Cc: Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/acl.c | 8 ++++---- fs/ext4/extents.c | 2 +- fs/ext4/resize.c | 4 ++-- fs/ext4/xattr.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a8bae8cd1d5..cc92624a46a 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size) return ERR_PTR(-EINVAL); if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); for (n=0; n < count; n++) { @@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) *size = ext4_acl_size(acl->a_count); ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * - sizeof(ext4_acl_entry), GFP_KERNEL); + sizeof(ext4_acl_entry), GFP_NOFS); if (!ext_acl) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); @@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type) } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { - value = kmalloc(retval, GFP_KERNEL); + value = kmalloc(retval, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); retval = ext4_xattr_get(inode, name_index, "", value, retval); @@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) if (error) goto cleanup; } - clone = posix_acl_clone(acl, GFP_KERNEL); + clone = posix_acl_clone(acl, GFP_NOFS); error = -ENOMEM; if (!clone) goto cleanup; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1c8aab4dad2..4e6afc812fd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1977,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3e0f5d06f3e..0ca63dcbdf8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -469,7 +469,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, goto exit_dindj; n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; ext4_warning(sb, __func__, @@ -552,7 +552,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, int res, i; int err; - primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); + primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); if (!primary) return -ENOMEM; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f56598df688..df4810d5a38 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -739,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ce = NULL; } ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); + s->base = kmalloc(bs->bh->b_size, GFP_NOFS); error = -ENOMEM; if (s->base == NULL) goto cleanup; @@ -751,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } } else { /* Allocate a buffer where we construct the new block. */ - s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); + s->base = kzalloc(sb->s_blocksize, GFP_NOFS); /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) -- cgit v1.2.3 From 3dcf54515aa4981a647ad74859199032965193a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Apr 2008 18:13:32 -0400 Subject: ext4: move headers out of include/linux Move ext4 headers out of include/linux. This is just the trivial move, there's some more thing that could be done later. Signed-off-by: Christoph Hellwig Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/acl.c | 4 +- fs/ext4/balloc.c | 6 +- fs/ext4/bitmap.c | 2 +- fs/ext4/dir.c | 2 +- fs/ext4/ext4.h | 1205 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4_extents.h | 232 +++++++++ fs/ext4/ext4_i.h | 167 +++++++ fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/ext4_jbd2.h | 231 +++++++++ fs/ext4/ext4_sb.h | 148 ++++++ fs/ext4/extents.c | 4 +- fs/ext4/file.c | 4 +- fs/ext4/fsync.c | 4 +- fs/ext4/hash.c | 2 +- fs/ext4/ialloc.c | 5 +- fs/ext4/inode.c | 2 +- fs/ext4/ioctl.c | 4 +- fs/ext4/mballoc.c | 4 +- fs/ext4/migrate.c | 4 +- fs/ext4/namei.c | 4 +- fs/ext4/resize.c | 3 +- fs/ext4/super.c | 5 +- fs/ext4/symlink.c | 2 +- fs/ext4/xattr.c | 4 +- fs/ext4/xattr_security.c | 4 +- fs/ext4/xattr_trusted.c | 4 +- fs/ext4/xattr_user.c | 4 +- 27 files changed, 2021 insertions(+), 41 deletions(-) create mode 100644 fs/ext4/ext4.h create mode 100644 fs/ext4/ext4_extents.h create mode 100644 fs/ext4/ext4_i.h create mode 100644 fs/ext4/ext4_jbd2.h create mode 100644 fs/ext4/ext4_sb.h (limited to 'fs') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index cc92624a46a..3c8dab880d9 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index af5032b23c2..da994374ec3 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -15,12 +15,12 @@ #include #include #include -#include -#include #include #include - +#include "ext4.h" +#include "ext4_jbd2.h" #include "group.h" + /* * balloc.c contains the blocks allocation and deallocation routines */ diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 420554f8f79..d37ea675045 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -9,7 +9,7 @@ #include #include -#include +#include "ext4.h" #ifdef EXT4FS_DEBUG diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 88c97f7312b..2bf0331ea19 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -23,10 +23,10 @@ #include #include -#include #include #include #include +#include "ext4.h" static unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h new file mode 100644 index 00000000000..8158083f7ac --- /dev/null +++ b/fs/ext4/ext4.h @@ -0,0 +1,1205 @@ +/* + * ext4.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_H +#define _EXT4_H + +#include +#include +#include +#include "ext4_i.h" + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT4FS_DEBUG to produce debug messages + */ +#undef EXT4FS_DEBUG + +/* + * Define EXT4_RESERVATION to reserve data blocks for expanding files + */ +#define EXT4_DEFAULT_RESERVE_BLOCKS 8 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT4_MAX_RESERVE_BLOCKS 1027 +#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 + +/* + * Debug code + */ +#ifdef EXT4FS_DEBUG +#define ext4_debug(f, a...) \ + do { \ + printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (KERN_DEBUG f, ## a); \ + } while (0) +#else +#define ext4_debug(f, a...) do {} while (0) +#endif + +#define EXT4_MULTIBLOCK_ALLOCATOR 1 + +/* prefer goal again. length */ +#define EXT4_MB_HINT_MERGE 1 +/* blocks already reserved */ +#define EXT4_MB_HINT_RESERVED 2 +/* metadata is being allocated */ +#define EXT4_MB_HINT_METADATA 4 +/* first blocks in the file */ +#define EXT4_MB_HINT_FIRST 8 +/* search for the best chunk */ +#define EXT4_MB_HINT_BEST 16 +/* data is being allocated */ +#define EXT4_MB_HINT_DATA 32 +/* don't preallocate (for tails) */ +#define EXT4_MB_HINT_NOPREALLOC 64 +/* allocate for locality group */ +#define EXT4_MB_HINT_GROUP_ALLOC 128 +/* allocate goal blocks or none */ +#define EXT4_MB_HINT_GOAL_ONLY 256 +/* goal is meaningful */ +#define EXT4_MB_HINT_TRY_GOAL 512 + +struct ext4_allocation_request { + /* target inode for block we're allocating */ + struct inode *inode; + /* logical block in target inode */ + ext4_lblk_t logical; + /* phys. target (a hint) */ + ext4_fsblk_t goal; + /* the closest logical allocated block to the left */ + ext4_lblk_t lleft; + /* phys. block for ^^^ */ + ext4_fsblk_t pleft; + /* the closest logical allocated block to the right */ + ext4_lblk_t lright; + /* phys. block for ^^^ */ + ext4_fsblk_t pright; + /* how many blocks we want to allocate */ + unsigned long len; + /* flags. see above EXT4_MB_HINT_* */ + unsigned long flags; +}; + +/* + * Special inodes numbers + */ +#define EXT4_BAD_INO 1 /* Bad blocks inode */ +#define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ +#define EXT4_JOURNAL_INO 8 /* Journal inode */ + +/* First non-reserved inode for old ext4 filesystems */ +#define EXT4_GOOD_OLD_FIRST_INO 11 + +/* + * Maximal count of links to a file + */ +#define EXT4_LINK_MAX 65000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT4_MIN_BLOCK_SIZE 1024 +#define EXT4_MAX_BLOCK_SIZE 65536 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) +#else +# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#endif +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#else +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#endif +#ifdef __KERNEL__ +#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) +#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) +#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) +#else +#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) +#endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) + +/* + * Structure of a blocks group descriptor + */ +struct ext4_group_desc +{ + __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ + __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ + __le32 bg_inode_table_lo; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ + __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ + __le32 bg_inode_table_hi; /* Inodes table block MSB */ + __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ + __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ + __le16 bg_used_dirs_count_hi; /* Directories count MSB */ + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __u32 bg_reserved2[3]; +}; + +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ + +#ifdef __KERNEL__ +#include "ext4_sb.h" +#endif +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT4_MIN_DESC_SIZE 32 +#define EXT4_MIN_DESC_SIZE_64BIT 64 +#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE +#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) +#ifdef __KERNEL__ +# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) +# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) +# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) +#else +# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) +# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#endif + +/* + * Constants relative to the data blocks + */ +#define EXT4_NDIR_BLOCKS 12 +#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS +#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) +#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) +#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT4_UNRM_FL 0x00000002 /* Undelete */ +#define EXT4_COMPR_FL 0x00000004 /* Compress file */ +#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT4_DIRTY_FL 0x00000100 +#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ +#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ +#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ + +#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + +/* + * Inode dynamic state flags + */ +#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ +#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ +#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ + +/* Used to pass group descriptor data when online resize is done */ +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ +struct ext4_new_group_data { + __u32 group; + __u64 block_bitmap; + __u64 inode_bitmap; + __u64 inode_table; + __u32 blocks_count; + __u16 reserved_blocks; + __u16 unused; + __u32 free_blocks_count; +}; + +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 + +/* + * ioctl commands + */ +#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT4_IOC_GETVERSION _IOR('f', 3, long) +#define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) +#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +#endif +#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT4_IOC_MIGRATE _IO('f', 7) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif +#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + + +/* + * Mount options + */ +struct ext4_mount_options { + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned long s_commit_interval; +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; +#endif +}; + +/* + * Structure of an inode on the disk + */ +struct ext4_inode { + __le16 i_mode; /* File mode */ + __le16 i_uid; /* Low 16 bits of Owner Uid */ + __le32 i_size_lo; /* Size in bytes */ + __le32 i_atime; /* Access time */ + __le32 i_ctime; /* Inode Change time */ + __le32 i_mtime; /* Modification time */ + __le32 i_dtime; /* Deletion Time */ + __le16 i_gid; /* Low 16 bits of Group Id */ + __le16 i_links_count; /* Links count */ + __le32 i_blocks_lo; /* Blocks count */ + __le32 i_flags; /* File flags */ + union { + struct { + __le32 l_i_version; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ + __le32 i_generation; /* File version (for NFS) */ + __le32 i_file_acl_lo; /* File ACL */ + __le32 i_size_high; + __le32 i_obso_faddr; /* Obsoleted fragment address */ + union { + struct { + __le16 l_i_blocks_high; /* were l_i_reserved1 */ + __le16 l_i_file_acl_high; + __le16 l_i_uid_high; /* these 2 fields */ + __le16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __le16 m_i_file_acl_high; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ + __le16 i_extra_isize; + __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ + __le32 i_version_hi; /* high 32 bits for 64-bit version */ +}; + + +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define i_disk_version osd1.linux1.l_i_version + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_file_acl_high osd2.linux2.l_i_file_acl_high +#define i_blocks_high osd2.linux2.l_i_blocks_high +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 + +#elif defined(__GNU__) + +#define i_translator osd1.hurd1.h_i_translator +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author + +#elif defined(__masix__) + +#define i_reserved1 osd1.masix1.m_i_reserved1 +#define i_file_acl_high osd2.masix2.m_i_file_acl_high +#define i_reserved2 osd2.masix2.m_i_reserved2 + +#endif /* defined(__KERNEL__) || defined(__linux__) */ + +/* + * File system states + */ +#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT4_ERROR_FS 0x0002 /* Errors detected */ +#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ + +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + +/* + * Mount flags + */ +#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ +#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ +#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ +#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ +#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ +#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ +/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ +#ifndef _LINUX_EXT2_FS_H +#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt +#define set_opt(o, opt) o |= EXT4_MOUNT_##opt +#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ + EXT4_MOUNT_##opt) +#else +#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD +#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT +#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS +#endif + +#define ext4_set_bit ext2_set_bit +#define ext4_set_bit_atomic ext2_set_bit_atomic +#define ext4_clear_bit ext2_clear_bit +#define ext4_clear_bit_atomic ext2_clear_bit_atomic +#define ext4_test_bit ext2_test_bit +#define ext4_find_first_zero_bit ext2_find_first_zero_bit +#define ext4_find_next_zero_bit ext2_find_next_zero_bit +#define ext4_find_next_bit ext2_find_next_bit + +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT4_ERRORS_PANIC 3 /* Panic */ +#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext4_super_block { +/*00*/ __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count_lo; /* Blocks count */ + __le32 s_r_blocks_count_lo; /* Reserved blocks count */ + __le32 s_free_blocks_count_lo; /* Free blocks count */ +/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ +/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ +/*30*/ __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ +/*40*/ __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ +/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT4_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ +/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ +/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ +/*78*/ char s_volume_name[16]; /* volume name */ +/*88*/ char s_last_mounted[64]; /* directory where last mounted */ +/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + /* + * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. + */ +/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ +/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ + __le32 s_journal_dev; /* device number of journal file */ + __le32 s_last_orphan; /* start of list of inodes to delete */ + __le32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __le16 s_desc_size; /* size of group descriptor */ +/*100*/ __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u32 s_reserved[163]; /* Padding to the end of the block */ +}; + +#ifdef __KERNEL__ +static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} +static inline struct ext4_inode_info *EXT4_I(struct inode *inode) +{ + return container_of(inode, struct ext4_inode_info, vfs_inode); +} + +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + +static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) +{ + return ino == EXT4_ROOT_INO || + ino == EXT4_JOURNAL_INO || + ino == EXT4_RESIZE_INO || + (ino >= EXT4_FIRST_INO(sb) && + ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); +} +#else +/* Assume that user mode programs are passing in an ext4fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT4_SB(sb) (sb) +#endif + +#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime + +/* + * Codes for operating systems + */ +#define EXT4_OS_LINUX 0 +#define EXT4_OS_HURD 1 +#define EXT4_OS_MASIX 2 +#define EXT4_OS_FREEBSD 3 +#define EXT4_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV +#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV + +#define EXT4_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) +#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) +#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) +#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) +#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) + +#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 +#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 + +#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 + +#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ +#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 +#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 + +#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG| \ + EXT4_FEATURE_INCOMPAT_EXTENTS| \ + EXT4_FEATURE_INCOMPAT_64BIT| \ + EXT4_FEATURE_INCOMPAT_FLEX_BG) +#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT4_DEF_RESUID 0 +#define EXT4_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT4_DEFM_DEBUG 0x0001 +#define EXT4_DEFM_BSDGROUPS 0x0002 +#define EXT4_DEFM_XATTR_USER 0x0004 +#define EXT4_DEFM_ACL 0x0008 +#define EXT4_DEFM_UID16 0x0010 +#define EXT4_DEFM_JMODE 0x0060 +#define EXT4_DEFM_JMODE_DATA 0x0020 +#define EXT4_DEFM_JMODE_ORDERED 0x0040 +#define EXT4_DEFM_JMODE_WBACK 0x0060 + +/* + * Structure of a directory entry + */ +#define EXT4_NAME_LEN 255 + +struct ext4_dir_entry { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __le16 name_len; /* Name length */ + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT4 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext4_dir_entry_2 { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * Ext4 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT4_FT_UNKNOWN 0 +#define EXT4_FT_REG_FILE 1 +#define EXT4_FT_DIR 2 +#define EXT4_FT_CHRDEV 3 +#define EXT4_FT_BLKDEV 4 +#define EXT4_FT_FIFO 5 +#define EXT4_FT_SOCK 6 +#define EXT4_FT_SYMLINK 7 + +#define EXT4_FT_MAX 8 + +/* + * EXT4_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT4_DIR_PAD 4 +#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) +#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ + ~EXT4_DIR_ROUND) +#define EXT4_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext4_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT4_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext4_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT4_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + +/* + * Hash Tree Directory indexing + * (c) Daniel Phillips, 2001 + */ + +#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ + EXT4_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) +#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) + +/* Legal values for the dx_root hash_version field: */ + +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 + +#ifdef __KERNEL__ + +/* hash info structure used by the directory hash */ +struct dx_hash_info +{ + u32 hash; + u32 minor_hash; + int hash_version; + u32 *seed; +}; + +#define EXT4_HTREE_EOF 0x7fffffff + +/* + * Control parameters used by ext4_htree_next_block + */ +#define HASH_NB_ALWAYS 1 + + +/* + * Describe an inode's exact location on disk and in memory + */ +struct ext4_iloc +{ + struct buffer_head *bh; + unsigned long offset; + ext4_group_t block_group; +}; + +static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) +{ + return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); +} + +/* + * This structure is stuffed into the struct file's private_data field + * for directories. It is where we put information so that we can do + * readdir operations in hash tree order. + */ +struct dir_private_info { + struct rb_root root; + struct rb_node *curr_node; + struct fname *extra_fname; + loff_t last_pos; + __u32 curr_hash; + __u32 curr_minor_hash; + __u32 next_hash; +}; + +/* calculate the first block number of the group */ +static inline ext4_fsblk_t +ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) +{ + return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); +} + +/* + * Special error return code only used by dx_probe() and its callers. + */ +#define ERR_BAD_DX_DIR -75000 + +void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, + unsigned long *blockgrpp, ext4_grpblk_t *offsetp); + +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in but none of the + * ext4 source programs needs to include it so they are duplicated here. + */ +# define NORET_TYPE /**/ +# define ATTRIB_NORET __attribute__((noreturn)) +# define NORET_AND noreturn, + +/* balloc.c */ +extern unsigned int ext4_block_group(struct super_block *sb, + ext4_fsblk_t blocknr); +extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, + ext4_fsblk_t blocknr); +extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, + ext4_group_t group); +extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp); +extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern void ext4_free_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, int metadata); +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks); +extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); +extern void ext4_check_blocks_bitmap (struct super_block *); +extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + ext4_group_t block_group, + struct buffer_head ** bh); +extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext4_init_block_alloc_info(struct inode *); +extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); + +/* dir.c */ +extern int ext4_check_dir_entry(const char *, struct inode *, + struct ext4_dir_entry_2 *, + struct buffer_head *, unsigned long); +extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, + __u32 minor_hash, + struct ext4_dir_entry_2 *dirent); +extern void ext4_htree_free_dir_info(struct dir_private_info *p); + +/* fsync.c */ +extern int ext4_sync_file (struct file *, struct dentry *, int); + +/* hash.c */ +extern int ext4fs_dirhash(const char *name, int len, struct + dx_hash_info *hinfo); + +/* ialloc.c */ +extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); +extern void ext4_free_inode (handle_t *, struct inode *); +extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes (struct super_block *); +extern unsigned long ext4_count_dirs (struct super_block *); +extern void ext4_check_inodes_bitmap (struct super_block *); +extern unsigned long ext4_count_free (struct buffer_head *, unsigned); + +/* mballoc.c */ +extern long ext4_mb_stats; +extern long ext4_mb_max_to_scan; +extern int ext4_mb_init(struct super_block *, int); +extern int ext4_mb_release(struct super_block *); +extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, + struct ext4_allocation_request *, int *); +extern int ext4_mb_reserve_blocks(struct super_block *, int); +extern void ext4_mb_discard_inode_preallocations(struct inode *); +extern int __init init_ext4_mballoc(void); +extern void exit_ext4_mballoc(void); +extern void ext4_mb_free_blocks(handle_t *, struct inode *, + unsigned long, unsigned long, int, unsigned long *); + + +/* inode.c */ +int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t blocknr); +struct buffer_head *ext4_getblk(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +struct buffer_head *ext4_bread(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create, int extend_disksize); + +extern struct inode *ext4_iget(struct super_block *, unsigned long); +extern int ext4_write_inode (struct inode *, int); +extern int ext4_setattr (struct dentry *, struct iattr *); +extern void ext4_delete_inode (struct inode *); +extern int ext4_sync_inode (handle_t *, struct inode *); +extern void ext4_discard_reservation (struct inode *); +extern void ext4_dirty_inode(struct inode *); +extern int ext4_change_inode_journal_flag(struct inode *, int); +extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); +extern void ext4_truncate (struct inode *); +extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); +extern void ext4_set_aops(struct inode *inode); +extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from); + +/* ioctl.c */ +extern long ext4_ioctl(struct file *, unsigned int, unsigned long); +extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); + +/* migrate.c */ +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, + unsigned long); +/* namei.c */ +extern int ext4_orphan_add(handle_t *, struct inode *); +extern int ext4_orphan_del(handle_t *, struct inode *); +extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, + __u32 start_minor_hash, __u32 *next_hash); + +/* resize.c */ +extern int ext4_group_add(struct super_block *sb, + struct ext4_new_group_data *input); +extern int ext4_group_extend(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t n_blocks_count); + +/* super.c */ +extern void ext4_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void __ext4_std_error (struct super_block *, const char *, int); +extern void ext4_abort (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_update_dynamic_rev (struct super_block *sb); +extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, + __u32 compat); +extern int ext4_update_rocompat_feature(handle_t *handle, + struct super_block *sb, __u32 rocompat); +extern int ext4_update_incompat_feature(handle_t *handle, + struct super_block *sb, __u32 incompat); +extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, + struct ext4_group_desc *bg); +extern void ext4_block_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_table_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); + +static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | + le32_to_cpu(es->s_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | + le32_to_cpu(es->s_r_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | + le32_to_cpu(es->s_free_blocks_count_lo); +} + +static inline void ext4_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline loff_t ext4_isize(struct ext4_inode *raw_inode) +{ + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); +} + +static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) +{ + raw_inode->i_size_lo = cpu_to_le32(i_size); + raw_inode->i_size_high = cpu_to_le32(i_size >> 32); +} + +static inline +struct ext4_group_info *ext4_get_group_info(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info ***grp_info; + long indexv, indexh; + grp_info = EXT4_SB(sb)->s_group_info; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + return grp_info[indexv][indexh]; +} + + +#define ext4_std_error(sb, errno) \ +do { \ + if ((errno)) \ + __ext4_std_error((sb), __FUNCTION__, (errno)); \ +} while (0) + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern const struct file_operations ext4_dir_operations; + +/* file.c */ +extern const struct inode_operations ext4_file_inode_operations; +extern const struct file_operations ext4_file_operations; + +/* namei.c */ +extern const struct inode_operations ext4_dir_inode_operations; +extern const struct inode_operations ext4_special_inode_operations; + +/* symlink.c */ +extern const struct inode_operations ext4_symlink_inode_operations; +extern const struct inode_operations ext4_fast_symlink_inode_operations; + +/* extents.c */ +extern int ext4_ext_tree_init(handle_t *handle, struct inode *); +extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize); +extern void ext4_ext_truncate(struct inode *, struct page *); +extern void ext4_ext_init(struct super_block *); +extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); +extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, + sector_t block, unsigned long max_blocks, + struct buffer_head *bh, int create, + int extend_disksize); +#endif /* __KERNEL__ */ + +#endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h new file mode 100644 index 00000000000..75333b595fa --- /dev/null +++ b/fs/ext4/ext4_extents.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#ifndef _EXT4_EXTENTS +#define _EXT4_EXTENTS + +#include "ext4.h" + +/* + * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks + * becomes very small, so index split, in-depth growing and + * other hard changes happen much more often. + * This is for debug purposes only. + */ +#define AGGRESSIVE_TEST_ + +/* + * With EXTENTS_STATS defined, the number of blocks and extents + * are collected in the truncate path. They'll be shown at + * umount time. + */ +#define EXTENTS_STATS__ + +/* + * If CHECK_BINSEARCH is defined, then the results of the binary search + * will also be checked by linear search. + */ +#define CHECK_BINSEARCH__ + +/* + * If EXT_DEBUG is defined you can use the 'extdebug' mount option + * to get lots of info about what's going on. + */ +#define EXT_DEBUG__ +#ifdef EXT_DEBUG +#define ext_debug(a...) printk(a) +#else +#define ext_debug(a...) +#endif + +/* + * If EXT_STATS is defined then stats numbers are collected. + * These number will be displayed at umount time. + */ +#define EXT_STATS_ + + +/* + * ext4_inode has i_block array (60 bytes total). + * The first 12 bytes store ext4_extent_header; + * the remainder stores an array of ext4_extent. + */ + +/* + * This is the extent on-disk structure. + * It's used at the bottom of the tree. + */ +struct ext4_extent { + __le32 ee_block; /* first logical block extent covers */ + __le16 ee_len; /* number of blocks covered by extent */ + __le16 ee_start_hi; /* high 16 bits of physical block */ + __le32 ee_start_lo; /* low 32 bits of physical block */ +}; + +/* + * This is index on-disk structure. + * It's used at all the levels except the bottom. + */ +struct ext4_extent_idx { + __le32 ei_block; /* index covers logical blocks from 'block' */ + __le32 ei_leaf_lo; /* pointer to the physical block of the next * + * level. leaf or next index could be there */ + __le16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + +/* + * Each block (leaves and indexes), even inode-stored has header. + */ +struct ext4_extent_header { + __le16 eh_magic; /* probably will support different formats */ + __le16 eh_entries; /* number of valid entries */ + __le16 eh_max; /* capacity of store in entries */ + __le16 eh_depth; /* has tree real underlying blocks? */ + __le32 eh_generation; /* generation of the tree */ +}; + +#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) + +/* + * Array of ext4_ext_path contains path to some extent. + * Creation/lookup routines use it for traversal/splitting/etc. + * Truncate uses it to simulate recursive walking. + */ +struct ext4_ext_path { + ext4_fsblk_t p_block; + __u16 p_depth; + struct ext4_extent *p_ext; + struct ext4_extent_idx *p_idx; + struct ext4_extent_header *p_hdr; + struct buffer_head *p_bh; +}; + +/* + * structure for external API + */ + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_EXTENT 2 + + +#define EXT_MAX_BLOCK 0xffffffff + +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) + + +#define EXT_FIRST_EXTENT(__hdr__) \ + ((struct ext4_extent *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_FIRST_INDEX(__hdr__) \ + ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_HAS_FREE_INDEX(__path__) \ + (le16_to_cpu((__path__)->p_hdr->eh_entries) \ + < le16_to_cpu((__path__)->p_hdr->eh_max)) +#define EXT_LAST_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_LAST_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_MAX_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) + +static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) +{ + return (struct ext4_extent_header *) EXT4_I(inode)->i_data; +} + +static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) +{ + return (struct ext4_extent_header *) bh->b_data; +} + +static inline unsigned short ext_depth(struct inode *inode) +{ + return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); +} + +static inline void ext4_ext_tree_changed(struct inode *inode) +{ + EXT4_I(inode)->i_ext_generation++; +} + +static inline void +ext4_ext_invalidate_cache(struct inode *inode) +{ + EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; +} + +static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +{ + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +{ + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); +} + +extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); +extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); +extern int ext4_extent_tree_init(handle_t *, struct inode *); +extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); +extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); +extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); +extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, + struct ext4_ext_path *); +extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); +#endif /* _EXT4_EXTENTS */ + diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h new file mode 100644 index 00000000000..26a4ae255d7 --- /dev/null +++ b/fs/ext4/ext4_i.h @@ -0,0 +1,167 @@ +/* + * ext4_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_I +#define _EXT4_I + +#include +#include +#include +#include + +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + +/* data type for block group number */ +typedef unsigned long ext4_group_t; + +struct ext4_reserve_window { + ext4_fsblk_t _rsv_start; /* First byte reserved */ + ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext4_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext4_reserve_window rsv_window; +}; + +struct ext4_block_alloc_info { + /* information about reservation window */ + struct ext4_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext4_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + ext4_lblk_t last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext4_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext4_fsblk_t last_alloc_physical_block; +}; + +#define rsv_start rsv_window._rsv_start +#define rsv_end rsv_window._rsv_end + +/* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + ext4_lblk_t ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * third extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; + ext4_fsblk_t i_file_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + ext4_group_t i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + /* block reservation info */ + struct ext4_block_alloc_info *i_block_alloc_info; + + ext4_lblk_t i_dir_start_lookup; +#ifdef CONFIG_EXT4DEV_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* on-disk additional length */ + __u16 i_extra_isize; + + /* + * i_data_sem is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; + struct inode vfs_inode; + + unsigned long i_ext_generation; + struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; +}; + +#endif /* _EXT4_I */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 2e6007d418d..c75384b34f2 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -2,7 +2,7 @@ * Interface between ext4 and JBD */ -#include +#include "ext4_jbd2.h" int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h new file mode 100644 index 00000000000..9255a7d28b2 --- /dev/null +++ b/fs/ext4/ext4_jbd2.h @@ -0,0 +1,231 @@ +/* + * ext4_jbd2.h + * + * Written by Stephen C. Tweedie , 1999 + * + * Copyright 1998--1999 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Ext4-specific journaling extensions. + */ + +#ifndef _EXT4_JBD2_H +#define _EXT4_JBD2_H + +#include +#include +#include "ext4.h" + +#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) + +/* Define the number of blocks we need to account to a transaction to + * modify one block of data. + * + * We may have to touch one inode, one bitmap buffer, up to three + * indirection blocks, the group and superblock summaries, and the data + * block to complete the transaction. + * + * For extents-enabled fs we may have to allocate and modify up to + * 5 levels of tree + root which are stored in the inode. */ + +#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + || test_opt(sb, EXTENTS) ? 27U : 8U) + +/* Extended attribute operations touch at most two data buffers, + * two bitmap buffers, and two group summaries, in addition to the inode + * and the superblock, which are already accounted for. */ + +#define EXT4_XATTR_TRANS_BLOCKS 6U + +/* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ + EXT4_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + +/* Delete operations potentially hit one directory's namespace plus an + * entire inode, plus arbitrary amounts of bitmap/indirection data. Be + * generous. We can grow the delete transaction later if necessary. */ + +#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) + +/* Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * write(2) and truncate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. */ + +#define EXT4_MAX_TRANS_DATA 64U + +/* We break up a large truncate or write transaction once the handle's + * buffer credits gets this low, we need either to extend the + * transaction or to start a new one. Reserve enough space here for + * inode, bitmap, superblock, group and indirection updates for at least + * one block, plus two quota updates. Quota allocations are not + * needed. */ + +#define EXT4_RESERVE_TRANS_BLOCKS 12U + +#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 + +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#else +#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 +#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 +#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 +#endif + +int +ext4_mark_iloc_dirty(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc); + +/* + * On success, We end up with an outstanding reference count against + * iloc->bh. This _must_ be cleaned up later. + */ + +int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext4_iloc *iloc); + +int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); + +/* + * Wrapper functions with which ext4 calls into JBD. The intent here is + * to allow these to be turned into appropriate stubs so ext4 can control + * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't + * been done yet. + */ + +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) +{ + jbd2_journal_release_buffer(handle, bh); +} + +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); + +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh); + +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); + +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); + +#define ext4_journal_get_undo_access(handle, bh) \ + __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_get_write_access(handle, bh) \ + __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_revoke(handle, blocknr, bh) \ + __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) +#define ext4_journal_get_create_access(handle, bh) \ + __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_dirty_metadata(handle, bh) \ + __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) +#define ext4_journal_forget(handle, bh) \ + __ext4_journal_forget(__FUNCTION__, (handle), (bh)) + +int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); + +handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); +int __ext4_journal_stop(const char *where, handle_t *handle); + +static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) +{ + return ext4_journal_start_sb(inode->i_sb, nblocks); +} + +#define ext4_journal_stop(handle) \ + __ext4_journal_stop(__FUNCTION__, (handle)) + +static inline handle_t *ext4_journal_current_handle(void) +{ + return journal_current_handle(); +} + +static inline int ext4_journal_extend(handle_t *handle, int nblocks) +{ + return jbd2_journal_extend(handle, nblocks); +} + +static inline int ext4_journal_restart(handle_t *handle, int nblocks) +{ + return jbd2_journal_restart(handle, nblocks); +} + +static inline int ext4_journal_blocks_per_page(struct inode *inode) +{ + return jbd2_journal_blocks_per_page(inode); +} + +static inline int ext4_journal_force_commit(journal_t *journal) +{ + return jbd2_journal_force_commit(journal); +} + +/* super.c */ +int ext4_force_commit(struct super_block *sb); + +static inline int ext4_should_journal_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 1; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return 1; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 1; + return 0; +} + +static inline int ext4_should_order_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return 1; + return 0; +} + +static inline int ext4_should_writeback_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return 1; + return 0; +} + +#endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h new file mode 100644 index 00000000000..5802e69f219 --- /dev/null +++ b/fs/ext4/ext4_sb.h @@ -0,0 +1,148 @@ +/* + * ext4_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_SB +#define _EXT4_SB + +#ifdef __KERNEL__ +#include +#include +#include +#include +#endif +#include + +/* + * third extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned long s_mount_opt; + ext4_fsblk_t s_sb_block; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock s_blockgroup_lock; + + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; + struct ext4_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; + struct journal_s * s_journal; + struct list_head s_orphan; + unsigned long s_commit_interval; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD2_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + struct list_head s_active_transaction; + struct list_head s_closed_transaction; + struct list_head s_committed_transaction; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets, *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned long s_mb_stream_request; + unsigned long s_mb_max_to_scan; + unsigned long s_mb_min_to_scan; + unsigned long s_mb_stats; + unsigned long s_mb_order2_reqs; + unsigned long s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + struct proc_dir_entry *s_mb_proc; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; +}; + +#endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4e6afc812fd..a472bc04636 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -40,8 +39,9 @@ #include #include #include -#include #include +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 20507a24506..4159be6366a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -21,8 +21,8 @@ #include #include #include -#include -#include +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a04a1ac4e0c..1c8ba48d4f8 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -27,8 +27,8 @@ #include #include #include -#include -#include +#include "ext4.h" +#include "ext4_jbd2.h" /* * akpm: A new design for ext4_sync_file(). diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1555024e3b3..1d6329dbe39 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -11,8 +11,8 @@ #include #include -#include #include +#include "ext4.h" #define DELTA 0x9E3779B9 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index a86377401ff..d59bdf7233b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include @@ -25,7 +23,8 @@ #include #include #include - +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "group.h" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bd1a391725c..0c94db462c2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ce937fe432a..7a6c2f1faba 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -10,13 +10,13 @@ #include #include #include -#include -#include #include #include #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0b46fc0ca19..f87471de3af 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include #include #include @@ -34,6 +32,8 @@ #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "group.h" /* diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 9b4fb07d192..b9e077ba07e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -13,8 +13,8 @@ */ #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* * The contiguous blocks details which can be diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7fc1bc1c16d..ab16beaa830 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -28,14 +28,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include "ext4.h" +#include "ext4_jbd2.h" #include "namei.h" #include "xattr.h" diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0ca63dcbdf8..9f086a6a472 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -11,11 +11,10 @@ #define EXT4FS_DEBUG -#include - #include #include +#include "ext4_jbd2.h" #include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e3b3483b600..3435184114c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include #include @@ -38,9 +36,10 @@ #include #include #include - #include +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "namei.h" diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e6f9da4287c..e9178643dc0 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -19,8 +19,8 @@ #include #include -#include #include +#include "ext4.h" #include "xattr.h" static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index df4810d5a38..3fbc2c6c3d0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -53,11 +53,11 @@ #include #include #include -#include -#include #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index f17eaf2321b..ca5f89fc6ca 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -6,9 +6,9 @@ #include #include #include -#include -#include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" static size_t diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index e0f05acdafe..fff33382cad 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_TRUSTED_PREFIX "trusted." diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 7ed3d8ebf09..67be723fcc4 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -8,8 +8,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_USER_PREFIX "user." -- cgit v1.2.3 From c83617db76353ff30e825874be2c15c185b95759 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 22:00:47 -0400 Subject: ext4: Don't do GFP_NOFS allocations after taking ext4_lock_group We can't do GFP_NOFS allocation after taking ext4_lock_group BUG: sleeping function called from invalid context at mm/slab.c:3054 in_atomic():1, irqs_disabled():0 1 lock held by vi/2426: #0: (&ei->i_data_sem){----}, at: [] ext4_release_file+0x23/0x66 Pid: 2426, comm: vi Not tainted 2.6.25-rc7 #24 [] __might_sleep+0xbe/0xc5 [] kmem_cache_alloc+0x22/0xa6 [] ext4_mb_release_inode_pa+0x73/0x1b3 [] ext4_mb_discard_inode_preallocations+0x22d/0x2d4 [] ? param_set_ushort+0x32/0x39 [] ext4_discard_reservation+0x27/0x6a [] ext4_release_file+0x2a/0x66 [] __fput+0xae/0x155 [] fput+0x17/0x19 [] filp_close+0x50/0x5a [] sys_close+0x71/0xad [] sysenter_past_esp+0x5f/0xa5 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f87471de3af..d2f0b9661fb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3730,9 +3730,9 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) */ static noinline_for_stack int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, - struct ext4_prealloc_space *pa) + struct ext4_prealloc_space *pa, + struct ext4_allocation_context *ac) { - struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long end; @@ -3748,8 +3748,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) { ac->ac_sb = sb; ac->ac_inode = pa->pa_inode; @@ -3794,23 +3792,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, */ } atomic_add(free, &sbi->s_mb_discarded); - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); return err; } static noinline_for_stack int ext4_mb_release_group_pa(struct ext4_buddy *e4b, - struct ext4_prealloc_space *pa) + struct ext4_prealloc_space *pa, + struct ext4_allocation_context *ac) { - struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) ac->ac_op = EXT4_MB_HISTORY_DISCARD; @@ -3828,7 +3822,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ac->ac_b_ex.fe_len = pa->pa_len; ac->ac_b_ex.fe_logical = 0; ext4_mb_store_history(ac); - kmem_cache_free(ext4_ac_cachep, ac); } return 0; @@ -3850,6 +3843,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; struct list_head list; struct ext4_buddy e4b; int err; @@ -3877,6 +3871,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, grp = ext4_get_group_info(sb, group); INIT_LIST_HEAD(&list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, @@ -3931,9 +3926,9 @@ repeat: spin_unlock(pa->pa_obj_lock); if (pa->pa_linear) - ext4_mb_release_group_pa(&e4b, pa); + ext4_mb_release_group_pa(&e4b, pa, ac); else - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); @@ -3941,6 +3936,8 @@ repeat: out: ext4_unlock_group(sb, group); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); ext4_mb_release_desc(&e4b); put_bh(bitmap_bh); return free; @@ -3961,6 +3958,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) struct super_block *sb = inode->i_sb; struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; ext4_group_t group = 0; struct list_head list; struct ext4_buddy e4b; @@ -3975,6 +3973,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) INIT_LIST_HEAD(&list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); @@ -4039,7 +4038,7 @@ repeat: ext4_lock_group(sb, group); list_del(&pa->pa_group_list); - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); ext4_unlock_group(sb, group); ext4_mb_release_desc(&e4b); @@ -4048,6 +4047,8 @@ repeat: list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); } /* -- cgit v1.2.3 From ef7377289a1510d638004158e43878643bc75dc5 Mon Sep 17 00:00:00 2001 From: Solofo Ramangalahy Date: Tue, 29 Apr 2008 22:00:41 -0400 Subject: ext4: update ctime and mtime for truncate with extents. The recently announced "Linux POSIX file system test suite" caught a truncate issue when using extents: mtime and ctime are not updated when truncate is successful. This is the single issue caught with "default" ext4 (mkfs and mount with minimal options). The testsuite does not report failure with -o noextents. With the following patch, all tests of the testsuite pass. Signed-off-by: Solofo Ramangalahy Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a472bc04636..47929c4e3da 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2809,6 +2809,8 @@ out_stop: ext4_orphan_del(handle, inode); up_write(&EXT4_I(inode)->i_data_sem); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); } -- cgit v1.2.3 From 8753e88f1b4345677620ec68f847222a6301e2fd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 22:00:36 -0400 Subject: ext4: mark inode dirty after initializing the extent tree We should mark the inode dirty only after initializing the extent tree. Also if we fail during extent initialization we need to call DQUOT_FREE_INODE. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d59bdf7233b..c6efbab0c80 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -739,11 +739,6 @@ got: if (err) goto fail_free_drop; - err = ext4_mark_inode_dirty(handle, inode); - if (err) { - ext4_std_error(sb, err); - goto fail_free_drop; - } if (test_opt(sb, EXTENTS)) { /* set extent flag only for diretory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { @@ -752,10 +747,16 @@ got: err = ext4_update_incompat_feature(handle, sb, EXT4_FEATURE_INCOMPAT_EXTENTS); if (err) - goto fail; + goto fail_free_drop; } } + err = ext4_mark_inode_dirty(handle, inode); + if (err) { + ext4_std_error(sb, err); + goto fail_free_drop; + } + ext4_debug("allocating inode %lu\n", inode->i_ino); goto really_out; fail: -- cgit v1.2.3 From c19204b0ae3f8a125118fd5d425d3c7a5f8fda9b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Apr 2008 22:00:28 -0400 Subject: ext4: don't use ext4_error in ext4_check_descriptors Because ext4_check_descriptors is called at mount time you can't use ext4_error as it calls ext4_commit_sb, which since the sb isn't all the way initialized causes bad things to happen (ie a panic). This patch changes the ext4_error's to printk's to keep this problem from happening. Thanks much, Signed-off-by: Josef Bacik Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3435184114c..52dd0679a4e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1484,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Block bitmap for group %lu" - " not in group (block %llu)!", - i, block_bitmap); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Block bitmap for group %lu not in group " + "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Inode bitmap for group %lu" - " not in group (block %llu)!", - i, inode_bitmap); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Inode bitmap for group %lu not in group " + "(block %llu)!", i, inode_bitmap); return 0; } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { - ext4_error (sb, "ext4_check_descriptors", - "Inode table for group %lu" - " not in group (block %llu)!", - i, inode_table); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Inode table for group %lu not in group " + "(block %llu)!", i, inode_table); return 0; } if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - ext4_error(sb, __func__, - "Checksum for group %lu failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + "Checksum for group %lu failed (%u!=%u)\n", + i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), le16_to_cpu(gdp->bg_checksum)); return 0; } if (!flexbg_flag) -- cgit v1.2.3 From 60bd63d1928c65abd71d8b9b45672cf6e3101845 Mon Sep 17 00:00:00 2001 From: Solofo Ramangalahy Date: Tue, 29 Apr 2008 21:59:59 -0400 Subject: ext4: cleanup for compiling mballoc with verification and debugging #defines This patch allows compiling mballoc with: #define AGGRESSIVE_CHECK #define DOUBLE_CHECK #define MB_DEBUG It fixes: Compilation errors: fs/ext4/mballoc.c: In function '__mb_check_buddy': fs/ext4/mballoc.c:605: error: 'struct ext4_prealloc_space' has no member named 'group_list' fs/ext4/mballoc.c:606: error: 'struct ext4_prealloc_space' has no member named 'pstart' fs/ext4/mballoc.c:608: error: 'struct ext4_prealloc_space' has no member named 'len' Compilation warnings: fs/ext4/mballoc.c: In function 'ext4_mb_normalize_group_request': fs/ext4/mballoc.c:2863: warning: format '%lu' expects type 'long unsigned int', but argument 3 has type 'int' fs/ext4/mballoc.c: In function 'ext4_mb_use_inode_pa': fs/ext4/mballoc.c:3103: warning: format '%lu' expects type 'long unsigned int', but argument 3 has type 'int' Sparse check: fs/ext4/mballoc.c:3818:2: warning: context imbalance in 'ext4_mb_show_ac' - different lock contexts for basic block Signed-off-by: Solofo Ramangalahy Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d2f0b9661fb..d4ae948606e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -896,10 +896,10 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; - pa = list_entry(cur, struct ext4_prealloc_space, group_list); - ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); + pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k); MB_CHECK_ASSERT(groupnr == e4b->bd_group); - for (i = 0; i < pa->len; i++) + for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } return 0; @@ -3131,7 +3131,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; else ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; - mb_debug("#%u: goal %lu blocks for locality group\n", + mb_debug("#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } @@ -3371,7 +3371,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, BUG_ON(pa->pa_free < len); pa->pa_free -= len; - mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); + mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); } /* @@ -4108,7 +4108,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) printk(KERN_ERR "PA:%lu:%d:%u \n", i, start, pa->pa_len); } - ext4_lock_group(sb, i); + ext4_unlock_group(sb, i); if (grp->bb_free == 0) continue; -- cgit v1.2.3 From 8f6e39a7ade8a5329c5651a2bc07010b3011da6a Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 29 Apr 2008 22:01:31 -0400 Subject: ext4: Move mballoc headers/structures to a seperate header file mballoc.h Move function and structure definiations out of mballoc.c and put it under a new header file mballoc.h Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 296 +--------------------------------------------------- fs/ext4/mballoc.h | 304 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+), 295 deletions(-) create mode 100644 fs/ext4/mballoc.h (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d4ae948606e..11e1fd59acb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -21,21 +21,7 @@ * mballoc.c contains the multiblocks allocation routines */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ext4_jbd2.h" -#include "ext4.h" -#include "group.h" - +#include "mballoc.h" /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() @@ -345,286 +331,6 @@ * */ -/* - * with AGGRESSIVE_CHECK allocator runs consistency checks over - * structures. these checks slow things down a lot - */ -#define AGGRESSIVE_CHECK__ - -/* - * with DOUBLE_CHECK defined mballoc creates persistent in-core - * bitmaps, maintains and uses them to check for double allocations - */ -#define DOUBLE_CHECK__ - -/* - */ -#define MB_DEBUG__ -#ifdef MB_DEBUG -#define mb_debug(fmt, a...) printk(fmt, ##a) -#else -#define mb_debug(fmt, a...) -#endif - -/* - * with EXT4_MB_HISTORY mballoc stores last N allocations in memory - * and you can monitor it in /proc/fs/ext4//mb_history - */ -#define EXT4_MB_HISTORY -#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ -#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ -#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ -#define EXT4_MB_HISTORY_FREE 8 /* free */ - -#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ - EXT4_MB_HISTORY_PREALLOC) - -/* - * How long mballoc can look for a best extent (in found extents) - */ -#define MB_DEFAULT_MAX_TO_SCAN 200 - -/* - * How long mballoc must look for a best extent - */ -#define MB_DEFAULT_MIN_TO_SCAN 10 - -/* - * How many groups mballoc will scan looking for the best chunk - */ -#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 - -/* - * with 'ext4_mb_stats' allocator will collect stats that will be - * shown at umount. The collecting costs though! - */ -#define MB_DEFAULT_STATS 1 - -/* - * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served - * by the stream allocator, which purpose is to pack requests - * as close each to other as possible to produce smooth I/O traffic - * We use locality group prealloc space for stream request. - * We can tune the same via /proc/fs/ext4//stream_req - */ -#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ - -/* - * for which requests use 2^N search using buddies - */ -#define MB_DEFAULT_ORDER2_REQS 2 - -/* - * default group prealloc size 512 blocks - */ -#define MB_DEFAULT_GROUP_PREALLOC 512 - -static struct kmem_cache *ext4_pspace_cachep; -static struct kmem_cache *ext4_ac_cachep; - -#ifdef EXT4_BB_MAX_BLOCKS -#undef EXT4_BB_MAX_BLOCKS -#endif -#define EXT4_BB_MAX_BLOCKS 30 - -struct ext4_free_metadata { - ext4_group_t group; - unsigned short num; - ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; - struct list_head list; -}; - -struct ext4_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext4_free_metadata *bb_md_cur; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; - struct list_head bb_prealloc_list; -#ifdef DOUBLE_CHECK - void *bb_bitmap; -#endif - unsigned short bb_counters[]; -}; - -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_LOCKED_BIT 1 - -#define EXT4_MB_GRP_NEED_INIT(grp) \ - (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) - - -struct ext4_prealloc_space { - struct list_head pa_inode_list; - struct list_head pa_group_list; - union { - struct list_head pa_tmp_list; - struct rcu_head pa_rcu; - } u; - spinlock_t pa_lock; - atomic_t pa_count; - unsigned pa_deleted; - ext4_fsblk_t pa_pstart; /* phys. block */ - ext4_lblk_t pa_lstart; /* log. block */ - unsigned short pa_len; /* len of preallocated chunk */ - unsigned short pa_free; /* how many blocks are free */ - unsigned short pa_linear; /* consumed in one direction - * strictly, for grp prealloc */ - spinlock_t *pa_obj_lock; - struct inode *pa_inode; /* hack, for history only */ -}; - - -struct ext4_free_extent { - ext4_lblk_t fe_logical; - ext4_grpblk_t fe_start; - ext4_group_t fe_group; - int fe_len; -}; - -/* - * Locality group: - * we try to group all related changes together - * so that writeback can flush/allocate them together as well - */ -struct ext4_locality_group { - /* for allocator */ - struct mutex lg_mutex; /* to serialize allocates */ - struct list_head lg_prealloc_list;/* list of preallocations */ - spinlock_t lg_prealloc_lock; -}; - -struct ext4_allocation_context { - struct inode *ac_inode; - struct super_block *ac_sb; - - /* original request */ - struct ext4_free_extent ac_o_ex; - - /* goal request (after normalization) */ - struct ext4_free_extent ac_g_ex; - - /* the best found extent */ - struct ext4_free_extent ac_b_ex; - - /* copy of the bext found extent taken before preallocation efforts */ - struct ext4_free_extent ac_f_ex; - - /* number of iterations done. we have to track to limit searching */ - unsigned long ac_ex_scanned; - __u16 ac_groups_scanned; - __u16 ac_found; - __u16 ac_tail; - __u16 ac_buddy; - __u16 ac_flags; /* allocation hints */ - __u8 ac_status; - __u8 ac_criteria; - __u8 ac_repeats; - __u8 ac_2order; /* if request is to allocate 2^N blocks and - * N > 0, the field stores N, otherwise 0 */ - __u8 ac_op; /* operation, for history only */ - struct page *ac_bitmap_page; - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -}; - -#define AC_STATUS_CONTINUE 1 -#define AC_STATUS_FOUND 2 -#define AC_STATUS_BREAK 3 - -struct ext4_mb_history { - struct ext4_free_extent orig; /* orig allocation */ - struct ext4_free_extent goal; /* goal allocation */ - struct ext4_free_extent result; /* result allocation */ - unsigned pid; - unsigned ino; - __u16 found; /* how many extents have been found */ - __u16 groups; /* how many groups have been scanned */ - __u16 tail; /* what tail broke some buddy */ - __u16 buddy; /* buddy the tail ^^^ broke */ - __u16 flags; - __u8 cr:3; /* which phase the result extent was found at */ - __u8 op:4; - __u8 merged:1; -}; - -struct ext4_buddy { - struct page *bd_buddy_page; - void *bd_buddy; - struct page *bd_bitmap_page; - void *bd_bitmap; - struct ext4_group_info *bd_info; - struct super_block *bd_sb; - __u16 bd_blkbits; - ext4_group_t bd_group; -}; -#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) -#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) - -#ifndef EXT4_MB_HISTORY -static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - return; -} -#else -static void ext4_mb_store_history(struct ext4_allocation_context *ac); -#endif - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - -static struct proc_dir_entry *proc_root_ext4; -struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); - -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, - ext4_group_t group); -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); -static void ext4_mb_free_committed_blocks(struct super_block *); -static void ext4_mb_return_to_preallocation(struct inode *inode, - struct ext4_buddy *e4b, sector_t block, - int count); -static void ext4_mb_put_pa(struct ext4_allocation_context *, - struct super_block *, struct ext4_prealloc_space *pa); -static int ext4_mb_init_per_dev_proc(struct super_block *sb); -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); - - -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline void ext4_unlock_group(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline int ext4_is_group_locked(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, - &(grinfo->bb_state)); -} - -static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, - struct ext4_free_extent *fex) -{ - ext4_fsblk_t block; - - block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + fex->fe_start - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - return block; -} - static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { #if BITS_PER_LONG == 64 diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h new file mode 100644 index 00000000000..bfe6add46bc --- /dev/null +++ b/fs/ext4/mballoc.h @@ -0,0 +1,304 @@ +/* + * fs/ext4/mballoc.h + * + * Written by: Alex Tomas + * + */ +#ifndef _EXT4_MBALLOC_H +#define _EXT4_MBALLOC_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ext4_jbd2.h" +#include "ext4.h" +#include "group.h" + +/* + * with AGGRESSIVE_CHECK allocator runs consistency checks over + * structures. these checks slow things down a lot + */ +#define AGGRESSIVE_CHECK__ + +/* + * with DOUBLE_CHECK defined mballoc creates persistent in-core + * bitmaps, maintains and uses them to check for double allocations + */ +#define DOUBLE_CHECK__ + +/* + */ +#define MB_DEBUG__ +#ifdef MB_DEBUG +#define mb_debug(fmt, a...) printk(fmt, ##a) +#else +#define mb_debug(fmt, a...) +#endif + +/* + * with EXT4_MB_HISTORY mballoc stores last N allocations in memory + * and you can monitor it in /proc/fs/ext4//mb_history + */ +#define EXT4_MB_HISTORY +#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ +#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ +#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ +#define EXT4_MB_HISTORY_FREE 8 /* free */ + +#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ + EXT4_MB_HISTORY_PREALLOC) + +/* + * How long mballoc can look for a best extent (in found extents) + */ +#define MB_DEFAULT_MAX_TO_SCAN 200 + +/* + * How long mballoc must look for a best extent + */ +#define MB_DEFAULT_MIN_TO_SCAN 10 + +/* + * How many groups mballoc will scan looking for the best chunk + */ +#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 + +/* + * with 'ext4_mb_stats' allocator will collect stats that will be + * shown at umount. The collecting costs though! + */ +#define MB_DEFAULT_STATS 1 + +/* + * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served + * by the stream allocator, which purpose is to pack requests + * as close each to other as possible to produce smooth I/O traffic + * We use locality group prealloc space for stream request. + * We can tune the same via /proc/fs/ext4//stream_req + */ +#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ + +/* + * for which requests use 2^N search using buddies + */ +#define MB_DEFAULT_ORDER2_REQS 2 + +/* + * default group prealloc size 512 blocks + */ +#define MB_DEFAULT_GROUP_PREALLOC 512 + +static struct kmem_cache *ext4_pspace_cachep; +static struct kmem_cache *ext4_ac_cachep; + +#ifdef EXT4_BB_MAX_BLOCKS +#undef EXT4_BB_MAX_BLOCKS +#endif +#define EXT4_BB_MAX_BLOCKS 30 + +struct ext4_free_metadata { + ext4_group_t group; + unsigned short num; + ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; + struct list_head list; +}; + +struct ext4_group_info { + unsigned long bb_state; + unsigned long bb_tid; + struct ext4_free_metadata *bb_md_cur; + unsigned short bb_first_free; + unsigned short bb_free; + unsigned short bb_fragments; + struct list_head bb_prealloc_list; +#ifdef DOUBLE_CHECK + void *bb_bitmap; +#endif + unsigned short bb_counters[]; +}; + +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_LOCKED_BIT 1 + +#define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) + + +struct ext4_prealloc_space { + struct list_head pa_inode_list; + struct list_head pa_group_list; + union { + struct list_head pa_tmp_list; + struct rcu_head pa_rcu; + } u; + spinlock_t pa_lock; + atomic_t pa_count; + unsigned pa_deleted; + ext4_fsblk_t pa_pstart; /* phys. block */ + ext4_lblk_t pa_lstart; /* log. block */ + unsigned short pa_len; /* len of preallocated chunk */ + unsigned short pa_free; /* how many blocks are free */ + unsigned short pa_linear; /* consumed in one direction + * strictly, for grp prealloc */ + spinlock_t *pa_obj_lock; + struct inode *pa_inode; /* hack, for history only */ +}; + + +struct ext4_free_extent { + ext4_lblk_t fe_logical; + ext4_grpblk_t fe_start; + ext4_group_t fe_group; + int fe_len; +}; + +/* + * Locality group: + * we try to group all related changes together + * so that writeback can flush/allocate them together as well + */ +struct ext4_locality_group { + /* for allocator */ + struct mutex lg_mutex; /* to serialize allocates */ + struct list_head lg_prealloc_list;/* list of preallocations */ + spinlock_t lg_prealloc_lock; +}; + +struct ext4_allocation_context { + struct inode *ac_inode; + struct super_block *ac_sb; + + /* original request */ + struct ext4_free_extent ac_o_ex; + + /* goal request (after normalization) */ + struct ext4_free_extent ac_g_ex; + + /* the best found extent */ + struct ext4_free_extent ac_b_ex; + + /* copy of the bext found extent taken before preallocation efforts */ + struct ext4_free_extent ac_f_ex; + + /* number of iterations done. we have to track to limit searching */ + unsigned long ac_ex_scanned; + __u16 ac_groups_scanned; + __u16 ac_found; + __u16 ac_tail; + __u16 ac_buddy; + __u16 ac_flags; /* allocation hints */ + __u8 ac_status; + __u8 ac_criteria; + __u8 ac_repeats; + __u8 ac_2order; /* if request is to allocate 2^N blocks and + * N > 0, the field stores N, otherwise 0 */ + __u8 ac_op; /* operation, for history only */ + struct page *ac_bitmap_page; + struct page *ac_buddy_page; + struct ext4_prealloc_space *ac_pa; + struct ext4_locality_group *ac_lg; +}; + +#define AC_STATUS_CONTINUE 1 +#define AC_STATUS_FOUND 2 +#define AC_STATUS_BREAK 3 + +struct ext4_mb_history { + struct ext4_free_extent orig; /* orig allocation */ + struct ext4_free_extent goal; /* goal allocation */ + struct ext4_free_extent result; /* result allocation */ + unsigned pid; + unsigned ino; + __u16 found; /* how many extents have been found */ + __u16 groups; /* how many groups have been scanned */ + __u16 tail; /* what tail broke some buddy */ + __u16 buddy; /* buddy the tail ^^^ broke */ + __u16 flags; + __u8 cr:3; /* which phase the result extent was found at */ + __u8 op:4; + __u8 merged:1; +}; + +struct ext4_buddy { + struct page *bd_buddy_page; + void *bd_buddy; + struct page *bd_bitmap_page; + void *bd_bitmap; + struct ext4_group_info *bd_info; + struct super_block *bd_sb; + __u16 bd_blkbits; + ext4_group_t bd_group; +}; +#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) +#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) + +#ifndef EXT4_MB_HISTORY +static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) +{ + return; +} +#else +static void ext4_mb_store_history(struct ext4_allocation_context *ac); +#endif + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +static struct proc_dir_entry *proc_root_ext4; +struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); + +static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, + ext4_group_t group); +static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); +static void ext4_mb_free_committed_blocks(struct super_block *); +static void ext4_mb_return_to_preallocation(struct inode *inode, + struct ext4_buddy *e4b, sector_t block, + int count); +static void ext4_mb_put_pa(struct ext4_allocation_context *, + struct super_block *, struct ext4_prealloc_space *pa); +static int ext4_mb_init_per_dev_proc(struct super_block *sb); +static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); + + +static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); +} + +static inline void ext4_unlock_group(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); +} + +static inline int ext4_is_group_locked(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); + + return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, + &(grinfo->bb_state)); +} + +static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, + struct ext4_free_extent *fex) +{ + ext4_fsblk_t block; + + block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) + + fex->fe_start + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + return block; +} +#endif -- cgit v1.2.3 From 7c2f3d6f89aab04c5c66a0a757888d3a77a5e899 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Tue, 29 Apr 2008 22:01:27 -0400 Subject: ext3: fix test ext_generic_write_end() copied return value 'copied' is unsigned, whereas 'ret2' is not. The test (copied < 0) fails Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: "Theodore Ts'o" --- fs/ext3/inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cc47b76091b..6ae4ecf3ce4 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - copied = ext3_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; } ret2 = ext3_journal_stop(handle); if (!ret) @@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file, if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - copied = ext3_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; ret2 = ext3_journal_stop(handle); if (!ret) -- cgit v1.2.3 From f8a87d89304c1eea8e4a8dc02d134f57590913c6 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Tue, 29 Apr 2008 22:01:18 -0400 Subject: ext4: fix test ext_generic_write_end() copied return value 'copied' is unsigned, whereas 'ret2' is not. The test (copied < 0) fails Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0c94db462c2..8d970774641 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1311,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - copied = ext4_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; } ret2 = ext4_journal_stop(handle); if (!ret) @@ -1339,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file, if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - copied = ext4_generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (copied < 0) - ret = copied; + copied = ret2; + if (ret2 < 0) + ret = ret2; ret2 = ext4_journal_stop(handle); if (!ret) -- cgit v1.2.3 From f1fa3342e271029f93d323ca664809b94594fe04 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Tue, 29 Apr 2008 22:01:15 -0400 Subject: ext4: fix hot spins in mballoc after err_freebuddy and err_freemeta In ext4_mb_init_backend() 'i' is of type ext4_group_t. Since unsigned, i >= 0 is always true, so fix hot spins after err_freebuddy: and -meta: and prevent decrements when zero. Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 11e1fd59acb..fbec2ef9379 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2272,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb) meta_group_info[j] = kzalloc(len, GFP_KERNEL); if (meta_group_info[j] == NULL) { printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); - i--; goto err_freebuddy; } desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR "EXT4-fs: can't read descriptor %lu\n", i); + i++; goto err_freebuddy; } memset(meta_group_info[j], 0, len); @@ -2318,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb) return 0; err_freebuddy: - while (i >= 0) { + while (i-- > 0) kfree(ext4_get_group_info(sb, i)); - i--; - } i = num_meta_group_infos; err_freemeta: - while (--i >= 0) + while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); err_freesgi: -- cgit v1.2.3 From 53492b1de46a7576170e865062ffcfc93bb5650b Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 30 Apr 2008 13:38:46 +0200 Subject: [S390] System z large page support. This adds hugetlbfs support on System z, using both hardware large page support if available and software large page emulation on older hardware. Shared (large) page tables are implemented in software emulation mode, by using page->index of the first tail page from a compound large page to store page table information. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- fs/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 2e43d46f65d..cf12c403b8c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL config HUGETLBFS bool "HugeTLB file system support" - depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN + depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ + (S390 && 64BIT) || BROKEN help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read -- cgit v1.2.3 From 64275ea4f33636de198da5c78d0dbe31522555b0 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Wed, 30 Apr 2008 17:11:16 +1000 Subject: [XFS] Include linux/random.h in all builds, not just debug. Noted-by: Stephen Rothwell Signed-off-by: Dave Chinner Signed-off-by: Linus Torvalds --- fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/support/debug.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 1bc9f600365..4edc46915b5 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -75,6 +75,7 @@ #include #include #include +#include #include #include diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 855da040864..75845f95081 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -49,8 +49,6 @@ extern void assfail(char *expr, char *f, int l); #else /* DEBUG */ -#include - #define ASSERT(expr) \ (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) -- cgit v1.2.3 From 2deb1acc653cbd5384b107d050d2deba089db2bd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 30 Apr 2008 00:52:33 -0700 Subject: isofs: fix access to unallocated memory when reading corrupted filesystem When a directory on isofs is corrupted, we did not check whether length of the name in a directory entry and the length of the directory entry itself are consistent. This could lead to possible access beyond the end of buffer when the length of the name was too big. Add this sanity check to directory reading code. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/isofs/dir.c | 8 ++++++++ fs/isofs/namei.c | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 1ba407c64df..2f0dc5a1463 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, } de = tmpde; } + /* Basic sanity check, whether name doesn't exceed dir entry */ + if (de_len < de->name_len[0] + + sizeof(struct iso_directory_record)) { + printk(KERN_NOTICE "iso9660: Corrupted directory entry" + " in block %lu of inode %lu\n", block, + inode->i_ino); + return -EIO; + } if (first_de) { isofs_normalize_block_and_offset(de, diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 344b247bc29..8299889a835 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, dlen = de->name_len[0]; dpnt = de->name; + /* Basic sanity check, whether name doesn't exceed dir entry */ + if (de_len < dlen + sizeof(struct iso_directory_record)) { + printk(KERN_NOTICE "iso9660: Corrupted directory entry" + " in block %lu of inode %lu\n", block, + dir->i_ino); + return 0; + } if (sbi->s_rock && ((i = get_rock_ridge_filename(de, tmpname, dir)))) { -- cgit v1.2.3 From 06fffb1267c9d986687b69d74a46ee332a50575e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:38 -0700 Subject: do_task_stat: don't take rcu_read_lock() lock_task_sighand() was changed, and do_task_stat() doesn't need rcu_read_lock any longer. sighand->siglock protects all "interesting" fields. Except: it doesn't protect ->tty->pgrp, but neither does rcu_read_lock(), this should be fixed. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: "Paul E. McKenney" Cc: Roland McGrath Cc: Alan Cox Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 07d6c4853fe..b07a71002f2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -425,7 +425,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, cutime = cstime = utime = stime = cputime_zero; cgtime = gtime = cputime_zero; - rcu_read_lock(); if (lock_task_sighand(task, &flags)) { struct signal_struct *sig = task->signal; @@ -469,7 +468,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unlock_task_sighand(task, &flags); } - rcu_read_unlock(); if (!whole || num_threads < 2) wchan = get_wchan(task); -- cgit v1.2.3 From 7a5e873f096e04e6d8719e4ecb7b70d2decca503 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:04 -0700 Subject: signals: de_thread: simplify the ->child_reaper switching Now that we rely on SIGNAL_UNKILLABLE flag, de_thread() doesn't need the nasty hack to kill the old ->child_reaper during the mt-exec. This also means we can avoid taking tasklist_lock around zap_other_threads(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index a13883903ee..8fccc276d40 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -766,9 +766,7 @@ static int de_thread(struct task_struct *tsk) /* * Kill all other threads in the thread group. - * We must hold tasklist_lock to call zap_other_threads. */ - read_lock(&tasklist_lock); spin_lock_irq(lock); if (signal_group_exit(sig)) { /* @@ -776,21 +774,10 @@ static int de_thread(struct task_struct *tsk) * return so that the signal is processed. */ spin_unlock_irq(lock); - read_unlock(&tasklist_lock); return -EAGAIN; } - - /* - * child_reaper ignores SIGKILL, change it now. - * Reparenting needs write_lock on tasklist_lock, - * so it is safe to do it under read_lock. - */ - if (unlikely(tsk->group_leader == task_child_reaper(tsk))) - task_active_pid_ns(tsk)->child_reaper = tsk; - sig->group_exit_task = tsk; zap_other_threads(tsk); - read_unlock(&tasklist_lock); /* Account for the thread group leader hanging around: */ count = thread_group_leader(tsk) ? 1 : 2; @@ -821,6 +808,8 @@ static int de_thread(struct task_struct *tsk) schedule(); } + if (unlikely(task_child_reaper(tsk) == leader)) + task_active_pid_ns(tsk)->child_reaper = tsk; /* * The only record we have of the real-time age of a * process, regardless of execs it's done, is start_time. -- cgit v1.2.3 From 4e4c22c71144c1b2e22c257ec6cf08ccb5be1165 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Apr 2008 00:53:06 -0700 Subject: signals: add set_restore_sigmask This adds the set_restore_sigmask() inline in and replaces every set_thread_flag(TIF_RESTORE_SIGMASK) with a call to it. No change, but abstracts the details of the flag protocol from all the calls. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 6 +++--- fs/eventpoll.c | 3 +-- fs/select.c | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 2ce4456aad3..9964d542ae9 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1720,7 +1720,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1791,7 +1791,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) @@ -2117,7 +2117,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, if (err == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0d237182d72..71af2fc0041 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1279,7 +1279,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } @@ -1309,4 +1309,3 @@ static int __init eventpoll_init(void) return 0; } fs_initcall(eventpoll_init); - diff --git a/fs/select.c b/fs/select.c index 00f58c5c7e0..32ce2b32fad 100644 --- a/fs/select.c +++ b/fs/select.c @@ -498,7 +498,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -805,7 +805,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) -- cgit v1.2.3 From f3de272b821accbc8387211977c2de4f38468d05 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Apr 2008 00:53:09 -0700 Subject: signals: use HAVE_SET_RESTORE_SIGMASK Change all the #ifdef TIF_RESTORE_SIGMASK conditionals in non-arch code to #ifdef HAVE_SET_RESTORE_SIGMASK. If arch code defines it first, the generic set_restore_sigmask() using TIF_RESTORE_SIGMASK is not defined. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 8 ++++---- fs/eventpoll.c | 4 ++-- fs/select.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 9964d542ae9..139dc93c092 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1634,7 +1634,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, @@ -1825,7 +1825,7 @@ sticky: return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) /* Stuff for NFS server syscalls... */ @@ -2080,7 +2080,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) #ifdef CONFIG_EPOLL -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, int maxevents, int timeout, @@ -2124,7 +2124,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, return err; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #endif /* CONFIG_EPOLL */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 71af2fc0041..221086fef17 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1241,7 +1241,7 @@ error_return: return error; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK /* * Implement the event wait interface for the eventpoll file. It is the kernel @@ -1287,7 +1287,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, return error; } -#endif /* #ifdef TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ static int __init eventpoll_init(void) { diff --git a/fs/select.c b/fs/select.c index 32ce2b32fad..2c292146e24 100644 --- a/fs/select.c +++ b/fs/select.c @@ -425,7 +425,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -528,7 +528,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ struct poll_list { struct poll_list *next; @@ -759,7 +759,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -839,4 +839,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ -- cgit v1.2.3 From 2800d8d19e51414403df8144eaa214bb03400b87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:12 -0700 Subject: document de_thread() with exit_notify() connection Add a couple of small comments, it is not easy to see what this code does. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 8fccc276d40..9f9f931ef94 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -798,7 +798,7 @@ static int de_thread(struct task_struct *tsk) if (!thread_group_leader(tsk)) { leader = tsk->group_leader; - sig->notify_count = -1; + sig->notify_count = -1; /* for exit_notify() */ for (;;) { write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) -- cgit v1.2.3 From 04f378b198da233ca0aca341b113dc6579d46123 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:53:29 -0700 Subject: tty: BKL pushdown - Push the BKL down into the line disciplines - Switch the tty layer to unlocked_ioctl - Introduce a new ctrl_lock spin lock for the control bits - Eliminate much of the lock_kernel use in n_tty - Prepare to (but don't yet) call the drivers with the lock dropped on the paths that historically held the lock BKL now primarily protects open/close/ldisc change in the tty layer [jirislaby@gmail.com: a couple of fixes] Signed-off-by: Alan Cox Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c6e72aebd16..9663e877672 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1046,7 +1046,7 @@ static int vt_check(struct file *file) struct inode *inode = file->f_path.dentry->d_inode; struct vc_data *vc; - if (file->f_op->ioctl != tty_ioctl) + if (file->f_op->unlocked_ioctl != tty_ioctl) return -EINVAL; tty = (struct tty_struct *)file->private_data; -- cgit v1.2.3 From 5d0fdf1e01899805b6c2c0b789a707dcb731b1ea Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:53:31 -0700 Subject: tty_io: fix remaining pid struct locking This fixes the last couple of pid struct locking failures I know about. [oleg@tv-sign.ru: clean up do_task_stat()] Signed-off-by: Alan Cox Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index b07a71002f2..c135cbdd912 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -429,7 +429,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct signal_struct *sig = task->signal; if (sig->tty) { - tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); + struct pid *pgrp = tty_get_pgrp(sig->tty); + tty_pgrp = pid_nr_ns(pgrp, ns); + put_pid(pgrp); tty_nr = new_encode_dev(tty_devnum(sig->tty)); } -- cgit v1.2.3 From f34d7a5b7010b82fe97da95496b9971435530062 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:54:13 -0700 Subject: tty: The big operations rework - Operations are now a shared const function block as with most other Linux objects - Introduce wrappers for some optional functions to get consistent behaviour - Wrap put_char which used to be patched by the tty layer - Document which functions are needed/optional - Make put_char report success/fail - Cache the driver->ops pointer in the tty as tty->ops - Remove various surplus lock calls we no longer need - Remove proc_write method as noted by Alexey Dobriyan - Introduce some missing sanity checks where certain driver/ldisc combinations would oops as they didn't check needed methods were present [akpm@linux-foundation.org: fix fs/compat_ioctl.c build] [akpm@linux-foundation.org: fix isicom] [akpm@linux-foundation.org: fix arch/ia64/hp/sim/simserial.c build] [akpm@linux-foundation.org: fix kgdb] Signed-off-by: Alan Cox Acked-by: Greg Kroah-Hartman Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 2 +- fs/proc/proc_tty.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9663e877672..97dba0d9234 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1053,7 +1053,7 @@ static int vt_check(struct file *file) if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - if (tty->driver->ioctl != vt_ioctl) + if (tty->ops->ioctl != vt_ioctl) return -EINVAL; vc = (struct vc_data *)tty->driver_data; diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index ac26ccc25f4..21f490f5d65 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -192,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if ((!driver->read_proc && !driver->write_proc) || - !driver->driver_name || + if (!driver->ops->read_proc || !driver->driver_name || driver->proc_entry) return; ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); if (!ent) return; - ent->read_proc = driver->read_proc; - ent->write_proc = driver->write_proc; + ent->read_proc = driver->ops->read_proc; ent->owner = driver->owner; ent->data = driver; -- cgit v1.2.3 From 718a916338e821a10961e6a7a17430c18e5e58d9 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 30 Apr 2008 00:54:21 -0700 Subject: devpts: factor out PTY index allocation Factor out the code used to allocate/free a pts index into new interfaces, devpts_new_index() and devpts_kill_index(). This localizes the external data structures used in managing the pts indices. [akpm@linux-foundation.org: undo accidental mutex2sem conversion] Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Serge Hallyn Signed-off-by: Matt Helsley Acked-by: H. Peter Anvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/devpts/inode.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f120e120787..285b64a8b06 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,6 +28,10 @@ #define DEVPTS_DEFAULT_MODE 0600 +extern int pty_limit; /* Config limit on Unix98 ptys */ +static DEFINE_IDR(allocated_ptys); +static DEFINE_MUTEX(allocated_ptys_lock); + static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -171,9 +177,44 @@ static struct dentry *get_node(int num) return lookup_one_len(s, root, sprintf(s, "%d", num)); } +int devpts_new_index(void) +{ + int index; + int idr_ret; + +retry: + if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { + return -ENOMEM; + } + + mutex_lock(&allocated_ptys_lock); + idr_ret = idr_get_new(&allocated_ptys, NULL, &index); + if (idr_ret < 0) { + mutex_unlock(&allocated_ptys_lock); + if (idr_ret == -EAGAIN) + goto retry; + return -EIO; + } + + if (index >= pty_limit) { + idr_remove(&allocated_ptys, index); + mutex_unlock(&allocated_ptys_lock); + return -EIO; + } + mutex_unlock(&allocated_ptys_lock); + return index; +} + +void devpts_kill_index(int idx) +{ + mutex_lock(&allocated_ptys_lock); + idr_remove(&allocated_ptys, idx); + mutex_unlock(&allocated_ptys_lock); +} + int devpts_pty_new(struct tty_struct *tty) { - int number = tty->index; + int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ struct tty_driver *driver = tty->driver; dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; -- cgit v1.2.3 From fa799759f9801137f665dbedda2c0815f1bf6f1b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:33 -0700 Subject: mm: bdi: expose the BDI object in sysfs for NFS Register NFS' backing_dev_info under sysfs with the name "nfs-MAJOR:MINOR" Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index fa220dc7460..7226a506f3c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data) return nfs_compare_mount_options(sb, server, mntflags); } +static int nfs_bdi_register(struct nfs_server *server) +{ + return bdi_register_dev(&server->backing_dev_info, server->s_dev); +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); + bdi_unregister(&server->backing_dev_info); kill_anon_super(s); nfs_free_server(server); } @@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { @@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, if (s->s_fs_info != server) { nfs_free_server(server); server = NULL; + } else { + error = nfs_bdi_register(server); + if (error) + goto error_splat_super; } if (!s->s_root) { -- cgit v1.2.3 From b6f2fcbcfca9db2bd7aa24940224fcd3bbdbb8aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:34 -0700 Subject: mm: bdi: expose the BDI object in sysfs for FUSE Register FUSE's backing_dev_info under sysfs with the name "fuse-MAJOR:MINOR" Make the fuse control filesystem use s_dev instead of a fuse specific ID. This makes it easier to match directories under /sys/fs/fuse/connections/ with directories under /sys/class/bdi, and with actual mounts. Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/control.c | 2 +- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 30 +++++++++++++++--------------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 105d4a271e0..4f3cab32141 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) parent = fuse_control_sb->s_root; inc_nlink(parent->d_inode); - sprintf(name, "%llu", (unsigned long long) fc->id); + sprintf(name, "%u", fc->dev); parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, &simple_dir_inode_operations, &simple_dir_operations); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 67aaf6ee38e..c0481e48d16 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -390,8 +390,8 @@ struct fuse_conn { /** Entry on the fuse_conn_list */ struct list_head entry; - /** Unique ID */ - u64 id; + /** Device ID from super block */ + dev_t dev; /** Dentries in the control filesystem */ struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4df34da2284..c4fcfd59cd8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -447,7 +447,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static struct fuse_conn *new_conn(void) +static struct fuse_conn *new_conn(struct super_block *sb) { struct fuse_conn *fc; int err; @@ -468,19 +468,26 @@ static struct fuse_conn *new_conn(void) atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; + fc->dev = sb->s_dev; err = bdi_init(&fc->bdi); - if (err) { - kfree(fc); - fc = NULL; - goto out; - } + if (err) + goto error_kfree; + err = bdi_register_dev(&fc->bdi, fc->dev); + if (err) + goto error_bdi_destroy; fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } -out: return fc; + +error_bdi_destroy: + bdi_destroy(&fc->bdi); +error_kfree: + mutex_destroy(&fc->inst_mutex); + kfree(fc); + return NULL; } void fuse_conn_put(struct fuse_conn *fc) @@ -578,12 +585,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) request_send_background(fc, req); } -static u64 conn_id(void) -{ - static u64 ctr = 1; - return ctr++; -} - static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; @@ -621,7 +622,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->f_op != &fuse_dev_operations) return -EINVAL; - fc = new_conn(); + fc = new_conn(sb); if (!fc) return -ENOMEM; @@ -659,7 +660,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->private_data) goto err_unlock; - fc->id = conn_id(); err = fuse_ctl_add_conn(fc); if (err) goto err_unlock; -- cgit v1.2.3 From e4ad08fe64afca4ef79ecc4c624e6e871688da0d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:37 -0700 Subject: mm: bdi: add separate writeback accounting capability Add a new BDI capability flag: BDI_CAP_NO_ACCT_WB. If this flag is set, then don't update the per-bdi writeback stats from test_set_page_writeback() and test_clear_page_writeback(). Misc cleanups: - convert bdi_cap_writeback_dirty() and friends to static inline functions - create a flag that includes all three dirty/writeback related flags, since almst all users will want to have them toghether Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/configfs/inode.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/ocfs2/dlm/dlmfs.c | 2 +- fs/ramfs/inode.c | 2 +- fs/sysfs/inode.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4c1ebff778e..b9a1d810346 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = { static struct backing_dev_info configfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations configfs_inode_operations ={ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9783723e8ff..aeabf80f81a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations; static struct backing_dev_info hugetlbfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; int sysctl_hugetlb_shm_group; diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 61a000f8524..e48aba698b7 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -327,7 +327,7 @@ clear_fields: static struct backing_dev_info dlmfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static struct inode *dlmfs_get_root_inode(struct super_block *sb) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 8428d5b2711..b13123424e4 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, }; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index d9262f74f94..f8b82e73b3b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = { static struct backing_dev_info sysfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations sysfs_inode_operations ={ -- cgit v1.2.3 From fc3ba692a4d19019387c5acaea63131f9eab05dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:38 -0700 Subject: mm: Add NR_WRITEBACK_TEMP counter Fuse will use temporary buffers to write back dirty data from memory mappings (normal writes are done synchronously). This is needed, because there cannot be any guarantee about the time in which a write will complete. By using temporary buffers, from the MM's point if view the page is written back immediately. If the writeout was due to memory pressure, this effectively migrates data from a full zone to a less full zone. This patch adds a new counter (NR_WRITEBACK_TEMP) for the number of pages used as temporary buffers. [Lee.Schermerhorn@hp.com: add vmstat_text for NR_WRITEBACK_TEMP] Signed-off-by: Miklos Szeredi Cc: Christoph Lameter Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 48bcf20cec2..74a323d2b85 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "PageTables: %8lu kB\n" "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" + "WritebackTmp: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" @@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_PAGETABLE)), K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), + K(global_page_state(NR_WRITEBACK_TEMP)), K(allowed), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, -- cgit v1.2.3 From 3be5a52b30aa5cf9d795b7634f728f612197b1c4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:41 -0700 Subject: fuse: support writable mmap Quoting Linus (3 years ago, FUSE inclusion discussions): "User-space filesystems are hard to get right. I'd claim that they are almost impossible, unless you limit them somehow (shared writable mappings are the nastiest part - if you don't have those, you can reasonably limit your problems by limiting the number of dirty pages you accept through normal "write()" calls)." Instead of attempting the impossible, I've just waited for the dirty page accounting infrastructure to materialize (thanks to Peter Zijlstra and others). This nicely solved the biggest problem: limiting the number of pages used for write caching. Some small details remained, however, which this largish patch attempts to address. It provides a page writeback implementation for fuse, which is completely safe against VM related deadlocks. Performance may not be very good for certain usage patterns, but generally it should be acceptable. It has been tested extensively with fsx-linux and bash-shared-mapping. Fuse page writeback design -------------------------- fuse_writepage() allocates a new temporary page with GFP_NOFS|__GFP_HIGHMEM. It copies the contents of the original page, and queues a WRITE request to the userspace filesystem using this temp page. The writeback is finished instantly from the MM's point of view: the page is removed from the radix trees, and the PageDirty and PageWriteback flags are cleared. For the duration of the actual write, the NR_WRITEBACK_TEMP counter is incremented. The per-bdi writeback count is not decremented until the actual write completes. On dirtying the page, fuse waits for a previous write to finish before proceeding. This makes sure, there can only be one temporary page used at a time for one cached page. This approach is wasteful in both memory and CPU bandwidth, so why is this complication needed? The basic problem is that there can be no guarantee about the time in which the userspace filesystem will complete a write. It may be buggy or even malicious, and fail to complete WRITE requests. We don't want unrelated parts of the system to grind to a halt in such cases. Also a filesystem may need additional resources (particularly memory) to complete a WRITE request. There's a great danger of a deadlock if that allocation may wait for the writepage to finish. Currently there are several cases where the kernel can block on page writeback: - allocation order is larger than PAGE_ALLOC_COSTLY_ORDER - page migration - throttle_vm_writeout (through NR_WRITEBACK) - sync(2) Of course in some cases (fsync, msync) we explicitly want to allow blocking. So for these cases new code has to be added to fuse, since the VM is not tracking writeback pages for us any more. As an extra safetly measure, the maximum dirty ratio allocated to a single fuse filesystem is set to 1% by default. This way one (or several) buggy or malicious fuse filesystems cannot slow down the rest of the system by hogging dirty memory. With appropriate privileges, this limit can be raised through '/sys/class/bdi//max_ratio'. Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 19 ++++ fs/fuse/dir.c | 84 ++++++++++++++- fs/fuse/file.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 37 +++++++ fs/fuse/inode.c | 49 +++++++-- 5 files changed, 481 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index af639807524..bba83762c48 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void) return req; } +struct fuse_req *fuse_request_alloc_nofs(void) +{ + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); + if (req) + fuse_request_init(req); + return req; +} + void fuse_request_free(struct fuse_req *req) { kmem_cache_free(fuse_req_cachep, req); @@ -429,6 +437,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) request_send_nowait(fc, req); } +/* + * Called under fc->lock + * + * fc->connected must have been checked previously + */ +void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + request_send_nowait_locked(fc, req); +} + /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c4807b3fc8a..48b9971ecd9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1106,6 +1106,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) } } +/* + * Prevent concurrent writepages on inode + * + * This is done by adding a negative bias to the inode write counter + * and waiting for all pending writes to finish. + */ +void fuse_set_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(!mutex_is_locked(&inode->i_mutex)); + + spin_lock(&fc->lock); + BUG_ON(fi->writectr < 0); + fi->writectr += FUSE_NOWRITE; + spin_unlock(&fc->lock); + wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); +} + +/* + * Allow writepages on inode + * + * Remove the bias from the writecounter and send any queued + * writepages. + */ +static void __fuse_release_nowrite(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + BUG_ON(fi->writectr != FUSE_NOWRITE); + fi->writectr = 0; + fuse_flush_writepages(inode); +} + +void fuse_release_nowrite(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + spin_lock(&fc->lock); + __fuse_release_nowrite(inode); + spin_unlock(&fc->lock); +} + /* * Set attributes, and at the same time refresh them. * @@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; + bool is_truncate = false; + loff_t oldsize; int err; if (!fuse_allow_task(fc, current)) @@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, send_sig(SIGXFSZ, current, 0); return -EFBIG; } + is_truncate = true; } req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); + if (is_truncate) + fuse_set_nowrite(inode); + memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); @@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); - return err; + goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); - return -EIO; + err = -EIO; + goto error; + } + + spin_lock(&fc->lock); + fuse_change_attributes_common(inode, &outarg.attr, + attr_timeout(&outarg)); + oldsize = inode->i_size; + i_size_write(inode, outarg.attr.size); + + if (is_truncate) { + /* NOTE: this may release/reacquire fc->lock */ + __fuse_release_nowrite(inode); + } + spin_unlock(&fc->lock); + + /* + * Only call invalidate_inode_pages2() after removing + * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. + */ + if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + if (outarg.attr.size < oldsize) + fuse_truncate(inode->i_mapping, outarg.attr.size); + invalidate_inode_pages2(inode->i_mapping); } - fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0); return 0; + +error: + if (is_truncate) + fuse_release_nowrite(inode); + + return err; } static int fuse_setattr(struct dentry *entry, struct iattr *attr) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 676b0bc8a86..68051f3bdf9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) return (u64) v0 + ((u64) v1 << 32); } +/* + * Check if page is under writeback + * + * This is currently done by walking the list of writepage requests + * for the inode, which can be pretty inefficient. + */ +static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + bool found = false; + + spin_lock(&fc->lock); + list_for_each_entry(req, &fi->writepages, writepages_entry) { + pgoff_t curr_index; + + BUG_ON(req->inode != inode); + curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (curr_index == index) { + found = true; + break; + } + } + spin_unlock(&fc->lock); + + return found; +} + +/* + * Wait for page writeback to be completed. + * + * Since fuse doesn't rely on the VM writeback tracking, this has to + * use some other means. + */ +static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); + return 0; +} + static int fuse_flush(struct file *file, fl_owner_t id) { struct inode *inode = file->f_path.dentry->d_inode; @@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id) return err; } +/* + * Wait for all pending writepages on the inode to finish. + * + * This is currently done by blocking further writes with FUSE_NOWRITE + * and waiting for all sent writes to complete. + * + * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage + * could conflict with truncation. + */ +static void fuse_sync_writes(struct inode *inode) +{ + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); +} + int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, int isdir) { @@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) return 0; + /* + * Start writeback against all dirty pages of the inode, then + * wait for all outstanding writes, before sending the FSYNC + * request. + */ + err = write_inode_now(inode, 0); + if (err) + return err; + + fuse_sync_writes(inode); + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -340,6 +409,13 @@ static int fuse_readpage(struct file *file, struct page *page) if (is_bad_inode(inode)) goto out; + /* + * Page writeback can extend beyond the liftime of the + * page-cache page, so make sure we read a properly synced + * page. + */ + fuse_wait_on_page_writeback(inode, page->index); + req = fuse_get_req(fc); err = PTR_ERR(req); if (IS_ERR(req)) @@ -411,6 +487,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode); + fuse_wait_on_page_writeback(inode, page->index); + if (req->num_pages && (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || @@ -477,11 +555,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, } static void fuse_write_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - int writepage) + struct fuse_file *ff, struct inode *inode, + loff_t pos, size_t count, int writepage) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_file *ff = file->private_data; struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; @@ -490,7 +567,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, inarg->offset = pos; inarg->size = count; inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; - inarg->flags = file->f_flags; + inarg->flags = file ? file->f_flags : 0; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); req->in.argpages = 1; @@ -511,7 +588,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, fl_owner_t owner) { struct fuse_conn *fc = get_fuse_conn(inode); - fuse_write_fill(req, file, inode, pos, count, 0); + fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); if (owner != NULL) { struct fuse_write_in *inarg = &req->misc.write.in; inarg->write_flags |= FUSE_WRITE_LOCKOWNER; @@ -546,6 +623,12 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, if (is_bad_inode(inode)) return -EIO; + /* + * Make sure writepages on the same page are not mixed up with + * plain writes. + */ + fuse_wait_on_page_writeback(inode, page->index); + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -716,21 +799,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, return res; } -static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) +static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { - if ((vma->vm_flags & VM_SHARED)) { - if ((vma->vm_flags & VM_WRITE)) - return -ENODEV; - else - vma->vm_flags &= ~VM_MAYWRITE; + __free_page(req->pages[0]); + fuse_file_put(req->ff); + fuse_put_request(fc, req); +} + +static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) +{ + struct inode *inode = req->inode; + struct fuse_inode *fi = get_fuse_inode(inode); + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + list_del(&req->writepages_entry); + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); + wake_up(&fi->page_waitq); +} + +/* Called under fc->lock, may release and reacquire it */ +static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) +{ + struct fuse_inode *fi = get_fuse_inode(req->inode); + loff_t size = i_size_read(req->inode); + struct fuse_write_in *inarg = &req->misc.write.in; + + if (!fc->connected) + goto out_free; + + if (inarg->offset + PAGE_CACHE_SIZE <= size) { + inarg->size = PAGE_CACHE_SIZE; + } else if (inarg->offset < size) { + inarg->size = size & (PAGE_CACHE_SIZE - 1); + } else { + /* Got truncated off completely */ + goto out_free; } - return generic_file_mmap(file, vma); + + req->in.args[1].size = inarg->size; + fi->writectr++; + request_send_background_locked(fc, req); + return; + + out_free: + fuse_writepage_finish(fc, req); + spin_unlock(&fc->lock); + fuse_writepage_free(fc, req); + spin_lock(&fc->lock); } -static int fuse_set_page_dirty(struct page *page) +/* + * If fi->writectr is positive (no truncate or fsync going on) send + * all queued writepage requests. + * + * Called with fc->lock + */ +void fuse_flush_writepages(struct inode *inode) { - printk("fuse_set_page_dirty: should not happen\n"); - dump_stack(); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + + while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { + req = list_entry(fi->queued_writes.next, struct fuse_req, list); + list_del_init(&req->list); + fuse_send_writepage(fc, req); + } +} + +static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) +{ + struct inode *inode = req->inode; + struct fuse_inode *fi = get_fuse_inode(inode); + + mapping_set_error(inode->i_mapping, req->out.h.error); + spin_lock(&fc->lock); + fi->writectr--; + fuse_writepage_finish(fc, req); + spin_unlock(&fc->lock); + fuse_writepage_free(fc, req); +} + +static int fuse_writepage_locked(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + struct fuse_file *ff; + struct page *tmp_page; + + set_page_writeback(page); + + req = fuse_request_alloc_nofs(); + if (!req) + goto err; + + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto err_free; + + spin_lock(&fc->lock); + BUG_ON(list_empty(&fi->write_files)); + ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); + req->ff = fuse_file_get(ff); + spin_unlock(&fc->lock); + + fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); + + copy_highpage(tmp_page, page); + req->num_pages = 1; + req->pages[0] = tmp_page; + req->page_offset = 0; + req->end = fuse_writepage_end; + req->inode = inode; + + inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); + inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + end_page_writeback(page); + + spin_lock(&fc->lock); + list_add(&req->writepages_entry, &fi->writepages); + list_add_tail(&req->list, &fi->queued_writes); + fuse_flush_writepages(inode); + spin_unlock(&fc->lock); + + return 0; + +err_free: + fuse_request_free(req); +err: + end_page_writeback(page); + return -ENOMEM; +} + +static int fuse_writepage(struct page *page, struct writeback_control *wbc) +{ + int err; + + err = fuse_writepage_locked(page); + unlock_page(page); + + return err; +} + +static int fuse_launder_page(struct page *page) +{ + int err = 0; + if (clear_page_dirty_for_io(page)) { + struct inode *inode = page->mapping->host; + err = fuse_writepage_locked(page); + if (!err) + fuse_wait_on_page_writeback(inode, page->index); + } + return err; +} + +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void fuse_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +/* + * Wait for writeback against this page to complete before allowing it + * to be marked dirty again, and hence written back again, possibly + * before the previous writepage completed. + * + * Block here, instead of in ->writepage(), so that the userspace fs + * can only block processes actually operating on the filesystem. + * + * Otherwise unprivileged userspace fs would be able to block + * unrelated: + * + * - page migration + * - sync(2) + * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER + */ +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + /* + * Don't use page->mapping as it may become NULL from a + * concurrent truncate. + */ + struct inode *inode = vma->vm_file->f_mapping->host; + + fuse_wait_on_page_writeback(inode, page->index); + return 0; +} + +static struct vm_operations_struct fuse_file_vm_ops = { + .close = fuse_vma_close, + .fault = filemap_fault, + .page_mkwrite = fuse_page_mkwrite, +}; + +static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff = file->private_data; + /* + * file may be written through mmap, so chain it onto the + * inodes's write_file list + */ + spin_lock(&fc->lock); + if (list_empty(&ff->write_entry)) + list_add(&ff->write_entry, &fi->write_files); + spin_unlock(&fc->lock); + } + file_accessed(file); + vma->vm_ops = &fuse_file_vm_ops; return 0; } @@ -940,10 +1227,12 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, + .writepage = fuse_writepage, + .launder_page = fuse_launder_page, .write_begin = fuse_write_begin, .write_end = fuse_write_end, .readpages = fuse_readpages, - .set_page_dirty = fuse_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c0481e48d16..4b094fbc9c7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -15,6 +15,7 @@ #include #include #include +#include /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -25,6 +26,9 @@ /** Congestion starts at 75% of maximum */ #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) +/** Bias for fi->writectr, meaning new writepages must not be sent */ +#define FUSE_NOWRITE INT_MIN + /** It could be as large as PATH_MAX, but would that have any uses? */ #define FUSE_NAME_MAX 1024 @@ -73,6 +77,19 @@ struct fuse_inode { /** Files usable in writepage. Protected by fc->lock */ struct list_head write_files; + + /** Writepages pending on truncate or fsync */ + struct list_head queued_writes; + + /** Number of sent writes, a negative bias (FUSE_NOWRITE) + * means more writes are blocked */ + int writectr; + + /** Waitq for writepage completion */ + wait_queue_head_t page_waitq; + + /** List of writepage requestst (pending or sent) */ + struct list_head writepages; }; /** FUSE specific file data */ @@ -242,6 +259,12 @@ struct fuse_req { /** File used in the request (or NULL) */ struct fuse_file *ff; + /** Inode used in the request or NULL */ + struct inode *inode; + + /** Link on fi->writepages */ + struct list_head writepages_entry; + /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); @@ -504,6 +527,11 @@ void fuse_init_symlink(struct inode *inode); void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); +void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid); + +void fuse_truncate(struct address_space *mapping, loff_t offset); + /** * Initialize the client device */ @@ -522,6 +550,8 @@ void fuse_ctl_cleanup(void); */ struct fuse_req *fuse_request_alloc(void); +struct fuse_req *fuse_request_alloc_nofs(void); + /** * Free a request */ @@ -558,6 +588,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); */ void request_send_background(struct fuse_conn *fc, struct fuse_req *req); +void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req); + /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); @@ -600,3 +632,8 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); int fuse_update_attributes(struct inode *inode, struct kstat *stat, struct file *file, bool *refreshed); + +void fuse_flush_writepages(struct inode *inode); + +void fuse_set_nowrite(struct inode *inode); +void fuse_release_nowrite(struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c4fcfd59cd8..7d01c68852a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; + fi->writectr = 0; INIT_LIST_HEAD(&fi->write_files); + INIT_LIST_HEAD(&fi->queued_writes); + INIT_LIST_HEAD(&fi->writepages); + init_waitqueue_head(&fi->page_waitq); fi->forget_req = fuse_request_alloc(); if (!fi->forget_req) { kmem_cache_free(fuse_inode_cachep, inode); @@ -73,6 +77,7 @@ static void fuse_destroy_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); + BUG_ON(!list_empty(&fi->queued_writes)); if (fi->forget_req) fuse_request_free(fi->forget_req); kmem_cache_free(fuse_inode_cachep, inode); @@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -static void fuse_truncate(struct address_space *mapping, loff_t offset) +void fuse_truncate(struct address_space *mapping, loff_t offset) { /* See vmtruncate() */ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); @@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset) unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); } - -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) +void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - loff_t oldsize; - spin_lock(&fc->lock); - if (attr_version != 0 && fi->attr_version > attr_version) { - spin_unlock(&fc->lock); - return; - } fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; @@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; +} + +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + loff_t oldsize; + + spin_lock(&fc->lock); + if (attr_version != 0 && fi->attr_version > attr_version) { + spin_unlock(&fc->lock); + return; + } + + fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); @@ -468,6 +482,8 @@ static struct fuse_conn *new_conn(struct super_block *sb) atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; fc->dev = sb->s_dev; err = bdi_init(&fc->bdi); if (err) @@ -475,6 +491,19 @@ static struct fuse_conn *new_conn(struct super_block *sb) err = bdi_register_dev(&fc->bdi, fc->dev); if (err) goto error_bdi_destroy; + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi//max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; -- cgit v1.2.3 From 854512ec358f291bcadd7daea10d6bf3704933de Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:41 -0700 Subject: fuse: clean up setting i_size in write Extract common code for setting i_size in write functions into a common helper. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 68051f3bdf9..f0f0f278b4e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -610,13 +610,24 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, return 0; } +static void fuse_write_update_size(struct inode *inode, loff_t pos) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + if (pos > inode->i_size) + i_size_write(inode, pos); + spin_unlock(&fc->lock); +} + static int fuse_buffered_write(struct file *file, struct inode *inode, loff_t pos, unsigned count, struct page *page) { int err; size_t nres; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); unsigned offset = pos & (PAGE_CACHE_SIZE - 1); struct fuse_req *req; @@ -643,12 +654,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, err = -EIO; if (!err) { pos += nres; - spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; - if (pos > inode->i_size) - i_size_write(inode, pos); - spin_unlock(&fc->lock); - + fuse_write_update_size(inode, pos); if (count == PAGE_CACHE_SIZE) SetPageUptodate(page); } @@ -766,12 +772,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } fuse_put_request(fc, req); if (res > 0) { - if (write) { - spin_lock(&fc->lock); - if (pos > inode->i_size) - i_size_write(inode, pos); - spin_unlock(&fc->lock); - } + if (write) + fuse_write_update_size(inode, pos); *ppos = pos; } fuse_invalidate_attr(inode); -- cgit v1.2.3 From ea9b9907b82a09bd1a708004454f7065de77c5b0 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Apr 2008 00:54:42 -0700 Subject: fuse: implement perform_write Introduce fuse_perform_write. With fusexmp (a passthrough filesystem), large (1MB) writes into a backing tmpfs filesystem are sped up by almost 4 times (256MB/s vs 71MB/s). [mszeredi@suse.cz]: - split into smaller functions - testing - duplicate generic_file_aio_write(), so that there's no need to add a new ->perform_write() a_op. Comment from hch. Signed-off-by: Nick Piggin Signed-off-by: Miklos Szeredi Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f0f0f278b4e..c5b5982bf38 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -677,6 +677,198 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, return res; } +static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, + size_t count) +{ + size_t res; + unsigned offset; + unsigned i; + + for (i = 0; i < req->num_pages; i++) + fuse_wait_on_page_writeback(inode, req->pages[i]->index); + + res = fuse_send_write(req, file, inode, pos, count, NULL); + + offset = req->page_offset; + count = res; + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + + if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE) + SetPageUptodate(page); + + if (count > PAGE_CACHE_SIZE - offset) + count -= PAGE_CACHE_SIZE - offset; + else + count = 0; + offset = 0; + + unlock_page(page); + page_cache_release(page); + } + + return res; +} + +static ssize_t fuse_fill_write_pages(struct fuse_req *req, + struct address_space *mapping, + struct iov_iter *ii, loff_t pos) +{ + struct fuse_conn *fc = get_fuse_conn(mapping->host); + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + size_t count = 0; + int err; + + req->page_offset = offset; + + do { + size_t tmp; + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset, + iov_iter_count(ii)); + + bytes = min_t(size_t, bytes, fc->max_write - count); + + again: + err = -EFAULT; + if (iov_iter_fault_in_readable(ii, bytes)) + break; + + err = -ENOMEM; + page = __grab_cache_page(mapping, index); + if (!page) + break; + + pagefault_disable(); + tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); + pagefault_enable(); + flush_dcache_page(page); + + if (!tmp) { + unlock_page(page); + page_cache_release(page); + bytes = min(bytes, iov_iter_single_seg_count(ii)); + goto again; + } + + err = 0; + req->pages[req->num_pages] = page; + req->num_pages++; + + iov_iter_advance(ii, tmp); + count += tmp; + pos += tmp; + offset += tmp; + if (offset == PAGE_CACHE_SIZE) + offset = 0; + + } while (iov_iter_count(ii) && count < fc->max_write && + req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); + + return count > 0 ? count : err; +} + +static ssize_t fuse_perform_write(struct file *file, + struct address_space *mapping, + struct iov_iter *ii, loff_t pos) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + int err = 0; + ssize_t res = 0; + + if (is_bad_inode(inode)) + return -EIO; + + do { + struct fuse_req *req; + ssize_t count; + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + err = PTR_ERR(req); + break; + } + + count = fuse_fill_write_pages(req, mapping, ii, pos); + if (count <= 0) { + err = count; + } else { + size_t num_written; + + num_written = fuse_send_write_pages(req, file, inode, + pos, count); + err = req->out.h.error; + if (!err) { + res += num_written; + pos += num_written; + + /* break out of the loop on short write */ + if (num_written != count) + err = -EIO; + } + } + fuse_put_request(fc, req); + } while (!err && iov_iter_count(ii)); + + if (res > 0) + fuse_write_update_size(inode, pos); + + fuse_invalidate_attr(inode); + + return res > 0 ? res : err; +} + +static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + size_t count = 0; + ssize_t written = 0; + struct inode *inode = mapping->host; + ssize_t err; + struct iov_iter i; + + WARN_ON(iocb->ki_pos != pos); + + err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); + if (err) + return err; + + mutex_lock(&inode->i_mutex); + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + + if (count == 0) + goto out; + + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + + file_update_time(file); + + iov_iter_init(&i, iov, nr_segs, count, 0); + written = fuse_perform_write(file, mapping, &i, pos); + if (written >= 0) + iocb->ki_pos = pos + written; + +out: + current->backing_dev_info = NULL; + mutex_unlock(&inode->i_mutex); + + return written ? written : err; +} + static void fuse_release_user_pages(struct fuse_req *req, int write) { unsigned i; @@ -1203,7 +1395,7 @@ static const struct file_operations fuse_file_operations = { .read = do_sync_read, .aio_read = fuse_file_aio_read, .write = do_sync_write, - .aio_write = generic_file_aio_write, + .aio_write = fuse_file_aio_write, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, -- cgit v1.2.3 From 5c5c5e51b26413d50a9efae2ca7d6c5c6cd453ac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:43 -0700 Subject: fuse: update file size on short read If the READ request returned a short count, then either - cached size is incorrect - filesystem is buggy, as short reads are only allowed on EOF So assume that the size is wrong and refresh it, so that cached read() doesn't zero fill the missing chunk. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 2 +- fs/fuse/file.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ fs/fuse/fuse_i.h | 7 ++++++- 3 files changed, 53 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 48b9971ecd9..2060bf06b90 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, req->out.args[0].value = outarg; } -static u64 fuse_get_attr_version(struct fuse_conn *fc) +u64 fuse_get_attr_version(struct fuse_conn *fc) { u64 curr_version; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c5b5982bf38..a02418c89d4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -363,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) void fuse_read_fill(struct fuse_req *req, struct file *file, struct inode *inode, loff_t pos, size_t count, int opcode) { - struct fuse_read_in *inarg = &req->misc.read_in; + struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; inarg->fh = ff->fh; @@ -389,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, fuse_read_fill(req, file, inode, pos, count, FUSE_READ); if (owner != NULL) { - struct fuse_read_in *inarg = &req->misc.read_in; + struct fuse_read_in *inarg = &req->misc.read.in; inarg->read_flags |= FUSE_READ_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); @@ -398,11 +398,29 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, return req->out.args[0].size; } +static void fuse_read_update_size(struct inode *inode, loff_t size, + u64 attr_ver) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + if (attr_ver == fi->attr_version && size < inode->i_size) { + fi->attr_version = ++fc->attr_version; + i_size_write(inode, size); + } + spin_unlock(&fc->lock); +} + static int fuse_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + size_t num_read; + loff_t pos = page_offset(page); + size_t count = PAGE_CACHE_SIZE; + u64 attr_ver; int err; err = -EIO; @@ -421,15 +439,25 @@ static int fuse_readpage(struct file *file, struct page *page) if (IS_ERR(req)) goto out; + attr_ver = fuse_get_attr_version(fc); + req->out.page_zeroing = 1; req->num_pages = 1; req->pages[0] = page; - fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, - NULL); + num_read = fuse_send_read(req, file, inode, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); - if (!err) + + if (!err) { + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (num_read < count) + fuse_read_update_size(inode, pos + num_read, attr_ver); + SetPageUptodate(page); + } + fuse_invalidate_attr(inode); /* atime changed */ out: unlock_page(page); @@ -439,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page) static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) { int i; + size_t count = req->misc.read.in.size; + size_t num_read = req->out.args[0].size; + struct inode *inode = req->pages[0]->mapping->host; - fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (!req->out.h.error && num_read < count) { + loff_t pos = page_offset(req->pages[0]) + num_read; + fuse_read_update_size(inode, pos, req->misc.read.attr_ver); + } + + fuse_invalidate_attr(inode); /* atime changed */ for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; @@ -463,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, size_t count = req->num_pages << PAGE_CACHE_SHIFT; req->out.page_zeroing = 1; fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4b094fbc9c7..934dd819a4e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -239,7 +239,10 @@ struct fuse_req { } release; struct fuse_init_in init_in; struct fuse_init_out init_out; - struct fuse_read_in read_in; + struct { + struct fuse_read_in in; + u64 attr_ver; + } read; struct { struct fuse_write_in in; struct fuse_write_out out; @@ -637,3 +640,5 @@ void fuse_flush_writepages(struct inode *inode); void fuse_set_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode); + +u64 fuse_get_attr_version(struct fuse_conn *fc); -- cgit v1.2.3 From e5d9a0df07484d6d191756878c974e4307fb24ce Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:44 -0700 Subject: fuse: fix max i/o size calculation Fix a bug that Werner Baumann reported: fuse can send a bigger write request than the maximum specified. This only affected direct_io operation. In addition set a sane minimum for the max_read and max_write tunables, so I/O always makes some progress. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 7 ++++--- fs/fuse/inode.c | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a02418c89d4..2d3649e4259 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -966,14 +966,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, while (count) { size_t nres; - size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, buf, nbytes, !write); + size_t nbytes_limit = min(count, nmax); + size_t nbytes; + int err = fuse_get_user_pages(req, buf, nbytes_limit, !write); if (err) { res = err; break; } nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; - nbytes = min(count, nbytes); + nbytes = min(nbytes_limit, nbytes); if (write) nres = fuse_send_write(req, file, inode, pos, nbytes, current->files); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7d01c68852a..0cef5ea319f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -584,6 +584,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + fc->max_write = min_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } fuse_put_request(fc, req); @@ -658,7 +659,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; - fc->max_read = d.max_read; + fc->max_read = min_t(unsigned, 4096, d.max_read); /* Used by get_root_inode() */ sb->s_fs_info = fc; -- cgit v1.2.3 From b48badf013018ef2aa4a46416454bdb18f77fb01 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:44 -0700 Subject: fuse: fix node ID type Node ID is 64bit but it is passed as unsigned long to some functions. This breakage wasn't noticed, because libfuse uses unsigned long too. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 934dd819a4e..dadffa21a20 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -464,7 +464,7 @@ extern const struct file_operations fuse_dev_operations; /** * Get a filled in inode */ -struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, +struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); @@ -472,7 +472,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, * Send FORGET command */ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - unsigned long nodeid, u64 nlookup); + u64 nodeid, u64 nlookup); /** * Initialize READ or READDIR request diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0cef5ea319f..79b61587383 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -84,7 +84,7 @@ static void fuse_destroy_inode(struct inode *inode) } void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - unsigned long nodeid, u64 nlookup) + u64 nodeid, u64 nlookup) { struct fuse_forget_in *inarg = &req->misc.forget_in; inarg->nlookup = nlookup; @@ -207,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) static int fuse_inode_eq(struct inode *inode, void *_nodeidp) { - unsigned long nodeid = *(unsigned long *) _nodeidp; + u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) return 1; else @@ -216,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp) static int fuse_inode_set(struct inode *inode, void *_nodeidp) { - unsigned long nodeid = *(unsigned long *) _nodeidp; + u64 nodeid = *(u64 *) _nodeidp; get_fuse_inode(inode)->nodeid = nodeid; return 0; } -struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, +struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { -- cgit v1.2.3 From 5559b8f4d1f630b8614b6c8e13b8bf6c9c45d7d7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:45 -0700 Subject: fuse: fix race in llseek Fuse doesn't use i_mutex to protect setting i_size, and so generic_file_llseek() can be racy: it doesn't use i_size_read(). So do a fuse specific llseek method, which does use i_size_read(). [akpm@linux-foundation.org: make `retval' loff_t] Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2d3649e4259..9ced35b0068 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1431,8 +1431,33 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) return err ? 0 : outarg.block; } +static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t retval; + struct inode *inode = file->f_path.dentry->d_inode; + + mutex_lock(&inode->i_mutex); + switch (origin) { + case SEEK_END: + offset += i_size_read(inode); + break; + case SEEK_CUR: + offset += file->f_pos; + } + retval = -EINVAL; + if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + retval = offset; + } + mutex_unlock(&inode->i_mutex); + return retval; +} + static const struct file_operations fuse_file_operations = { - .llseek = generic_file_llseek, + .llseek = fuse_file_llseek, .read = do_sync_read, .aio_read = fuse_file_aio_read, .write = do_sync_write, @@ -1448,7 +1473,7 @@ static const struct file_operations fuse_file_operations = { }; static const struct file_operations fuse_direct_io_file_operations = { - .llseek = generic_file_llseek, + .llseek = fuse_file_llseek, .read = fuse_direct_read, .write = fuse_direct_write, .open = fuse_open, -- cgit v1.2.3 From 4dbf930ed6c1f8aa992937d0461f8f70d4004aad Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:45 -0700 Subject: fuse: fix sparse warnings fs/fuse/dev.c:306:2: warning: context imbalance in 'wait_answer_interruptible' - unexpected unlock fs/fuse/dev.c:361:2: warning: context imbalance in 'request_wait_answer' - unexpected unlock fs/fuse/dev.c:1002:4: warning: context imbalance in 'end_io_requests' - unexpected unlock Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index bba83762c48..87250b6a868 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -299,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) static void wait_answer_interruptible(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) __acquires(fc->lock) { if (signal_pending(current)) return; @@ -315,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } -/* Called with fc->lock held. Releases, and then reacquires it. */ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) __acquires(fc->lock) { if (!fc->no_interrupt) { /* Any signal may interrupt this */ @@ -987,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) * locked). */ static void end_io_requests(struct fuse_conn *fc) + __releases(fc->lock) __acquires(fc->lock) { while (!list_empty(&fc->io)) { struct fuse_req *req = -- cgit v1.2.3 From 86098fa0115358abf5159093d11ddb306ce4b0da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2008 00:54:46 -0700 Subject: reiserfs: use open_bdev_excl Use the proper helper to open a blockdevice by name for filesystem use, this makes sure it's properly claimed (also added for open-by-number) and gets rid of the struct file abuse. Tested by mounting a reiserfs filesystem with external journal. Signed-off-by: Christoph Hellwig Cc: Chris Mason Cc: Jeff Mahoney Acked-by: Edward Shishkin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/journal.c | 50 +++++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index da86042b3e0..e396b2fa474 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super, result = 0; - if (journal->j_dev_file != NULL) { - result = filp_close(journal->j_dev_file, NULL); - journal->j_dev_file = NULL; - journal->j_dev_bd = NULL; - } else if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd->bd_dev != super->s_dev) + bd_release(journal->j_dev_bd); result = blkdev_put(journal->j_dev_bd); journal->j_dev_bd = NULL; } @@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super, result = 0; journal->j_dev_bd = NULL; - journal->j_dev_file = NULL; jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; @@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super, "cannot init journal device '%s': %i", __bdevname(jdev, b), result); return result; - } else if (jdev != super->s_dev) + } else if (jdev != super->s_dev) { + result = bd_claim(journal->j_dev_bd, journal); + if (result) { + blkdev_put(journal->j_dev_bd); + return result; + } + set_blocksize(journal->j_dev_bd, super->s_blocksize); + } + return 0; } - journal->j_dev_file = filp_open(jdev_name, 0, 0); - if (!IS_ERR(journal->j_dev_file)) { - struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; - if (!S_ISBLK(jdev_inode->i_mode)) { - reiserfs_warning(super, "journal_init_dev: '%s' is " - "not a block device", jdev_name); - result = -ENOTBLK; - release_journal_dev(super, journal); - } else { - /* ok */ - journal->j_dev_bd = I_BDEV(jdev_inode); - set_blocksize(journal->j_dev_bd, super->s_blocksize); - reiserfs_info(super, - "journal_init_dev: journal device: %s\n", - bdevname(journal->j_dev_bd, b)); - } - } else { - result = PTR_ERR(journal->j_dev_file); - journal->j_dev_file = NULL; + journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + if (IS_ERR(journal->j_dev_bd)) { + result = PTR_ERR(journal->j_dev_bd); + journal->j_dev_bd = NULL; reiserfs_warning(super, "journal_init_dev: Cannot open '%s': %i", jdev_name, result); + return result; } - return result; + + set_blocksize(journal->j_dev_bd, super->s_blocksize); + reiserfs_info(super, + "journal_init_dev: journal device: %s\n", + bdevname(journal->j_dev_bd, b)); + return 0; } /** -- cgit v1.2.3 From 6369a4abb486692cd0f5fe592b48ec7419b7976c Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 30 Apr 2008 00:54:47 -0700 Subject: affs: be*_add_cpu conversion replace all: big_endian_variable = cpu_to_beX(beX_to_cpu(big_endian_variable) + expression_in_cpu_byteorder); with: beX_add_cpu(&big_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/affs/file.c b/fs/affs/file.c index e87ede608f7..1a4f092f24e 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -539,7 +539,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) tmp = min(bsize - boff, newsize - size); BUG_ON(boff + tmp > bsize || tmp > bsize); memset(AFFS_DATA(bh) + boff, 0, tmp); - AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); + be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); size += tmp; @@ -680,7 +680,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); - AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); + be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); written += tmp; -- cgit v1.2.3 From 20c79e785ae3f813310261dde81b29ab0c3e28b4 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 30 Apr 2008 00:54:47 -0700 Subject: hfs/hfsplus: be*_add_cpu conversion replace all: big_endian_variable = cpu_to_beX(beX_to_cpu(big_endian_variable) + expression_in_cpu_byteorder); with: beX_add_cpu(&big_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/mdb.c | 2 +- fs/hfsplus/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index b4651e128d7..36ca2e1a4fa 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb) attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); mdb->drAtrb = attrib; - mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); + be32_add_cpu(&mdb->drWrCnt, 1); mdb->drLsMod = hfs_mtime(); mark_buffer_dirty(HFS_SB(sb)->mdb_bh); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 946466cd9f2..ce97a54518d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) */ vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); vhdr->modify_date = hfsp_now2mt(); - vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); + be32_add_cpu(&vhdr->write_count, 1); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); -- cgit v1.2.3 From e3592b12f507d2c12c883d9c18084b72a5710db3 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 30 Apr 2008 00:54:48 -0700 Subject: quota: le*_add_cpu conversion replace all: little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) + expression_in_cpu_byteorder); with: leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/quota_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/quota_v2.c b/fs/quota_v2.c index 23b647f25d0..234ada90363 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err) printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } - dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); + le16_add_cpu(&dh->dqdh_entries, 1); memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); /* Find free structure in block */ for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); @@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk) goto out_buf; } dh = (struct v2_disk_dqdbheader *)buf; - dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); + le16_add_cpu(&dh->dqdh_entries, -1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { -- cgit v1.2.3 From 07132922aac0caf807c56b9c2a388954b357a8c4 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 30 Apr 2008 00:54:49 -0700 Subject: sysv: [bl]e*_add_cpu conversion replace all: big/little_endian_variable = cpu_to_[bl]eX([bl]eX_to_cpu(big/little_endian_variable) + expression_in_cpu_byteorder); with: [bl]eX_add_cpu(&big/little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/sysv/sysv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 42d51d1c05c..38ebe3f85b3 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d) if (sbi->s_bytesex == BYTESEX_PDP) *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); else if (sbi->s_bytesex == BYTESEX_LE) - *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); + le32_add_cpu((__le32 *)n, d); else - *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); + be32_add_cpu((__be32 *)n, d); return *n; } @@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n) static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) { if (sbi->s_bytesex != BYTESEX_BE) - *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); + le16_add_cpu((__le16 *)n, d); else - *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); + be16_add_cpu((__be16 *)n, d); return *n; } -- cgit v1.2.3 From 3e5a5097303eedb4ffae2719843eb064221b1db4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 30 Apr 2008 00:54:53 -0700 Subject: hfs: fix warning with 64k PAGE_SIZE fs/hfs/btree.c: In function 'hfs_bmap_alloc': fs/hfs/btree.c:263: warning: comparison is always false due to limited range of data type The patch makes the warning go away, but the code might actually be buggy? Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/btree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 24cf6fc4302..f6621a78520 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) struct hfs_bnode *node, *next_node; struct page **pagep; u32 nidx, idx; - u16 off, len; + unsigned off; + u16 off16; + u16 len; u8 *data, byte, m; int i; @@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) node = hfs_bnode_find(tree, nidx); if (IS_ERR(node)) return node; - len = hfs_brec_lenoff(node, 2, &off); + len = hfs_brec_lenoff(node, 2, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); @@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) return next_node; node = next_node; - len = hfs_brec_lenoff(node, 0, &off); + len = hfs_brec_lenoff(node, 0, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); data = kmap(*pagep); -- cgit v1.2.3 From 487798df6d25e76ed6558b3e17c44cf0458cc6f3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 30 Apr 2008 00:54:54 -0700 Subject: hfsplus: fix warning with 64k PAGE_SIZE fs/hfsplus/btree.c: In function 'hfsplus_bmap_alloc': fs/hfsplus/btree.c:239: warning: comparison is always false due to limited range of data type But this might hide a real bug? Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/btree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index bb5433608a4..e49fcee1e29 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) struct hfs_bnode *node, *next_node; struct page **pagep; u32 nidx, idx; - u16 off, len; + unsigned off; + u16 off16; + u16 len; u8 *data, byte, m; int i; @@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) node = hfs_bnode_find(tree, nidx); if (IS_ERR(node)) return node; - len = hfs_brec_lenoff(node, 2, &off); + len = hfs_brec_lenoff(node, 2, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); @@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) return next_node; node = next_node; - len = hfs_brec_lenoff(node, 0, &off); + len = hfs_brec_lenoff(node, 0, &off16); + off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_CACHE_SHIFT); data = kmap(*pagep); -- cgit v1.2.3 From c6f3a97f86a5c97be0ca255976110bb9c3cfe669 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 00:55:03 -0700 Subject: debugobjects: add timer specific object debugging code Add calls to the generic object debugging infrastructure and provide fixup functions which allow to keep the system alive when recoverable problems have been detected by the object debugging core code. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Greg KH Cc: Randy Dunlap Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 99c2352906a..b5253e77eb2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1078,9 +1078,7 @@ static void timeout_func(unsigned long data) static inline void init_timeout(struct aio_timeout *to) { - init_timer(&to->timer); - to->timer.data = (unsigned long)to; - to->timer.function = timeout_func; + setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to); to->timed_out = 0; to->p = current; } @@ -1213,6 +1211,7 @@ retry: if (timeout) clear_timeout(&to); out: + destroy_timer_on_stack(&to.timer); return i ? i : ret; } -- cgit v1.2.3 From 530b6412786d7f83592c1a8e2445541ed73fca76 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 30 Apr 2008 00:55:09 -0700 Subject: afs: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 4 ++-- fs/afs/internal.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b58af8f18bc..dfda03d4397 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", - __FUNCTION__, dir->i_ino, qty, + __func__, dir->i_ino, qty, ntohs(dbuf->blocks[0].pagehdr.npages)); goto error; } @@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", - __FUNCTION__, dir->i_ino, tmp, qty, + __func__, dir->i_ino, tmp, qty, ntohs(dbuf->blocks[tmp].pagehdr.magic)); goto error; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index eec41c76de7..7102824ba84 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...) { } -#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) -#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) @@ -791,8 +791,8 @@ do { \ } while (0) #else -#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) -#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) #endif -- cgit v1.2.3 From 8e24eea728068bbeb6a3c500b848f883a20bf225 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 30 Apr 2008 00:55:09 -0700 Subject: fs: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/adfs/adfs.h | 2 +- fs/autofs4/autofs_i.h | 2 +- fs/bfs/bfs.h | 2 +- fs/buffer.c | 2 +- fs/configfs/file.c | 2 +- fs/configfs/mount.c | 2 +- fs/configfs/symlink.c | 4 ++-- fs/dlm/lockspace.c | 2 +- fs/exportfs/expfs.c | 10 +++++----- fs/fat/cache.c | 6 +++--- fs/fat/fatent.c | 2 +- fs/fat/file.c | 2 +- fs/gfs2/locking/dlm/sysfs.c | 2 +- fs/gfs2/util.h | 18 +++++++++--------- fs/jffs2/debug.h | 8 ++++---- fs/jffs2/xattr.c | 4 ++-- fs/lockd/clntproc.c | 2 +- fs/lockd/svclock.c | 2 +- fs/msdos/namei.c | 2 +- fs/namespace.c | 4 ++-- fs/nfsd/nfs4callback.c | 4 ++-- fs/ntfs/debug.h | 6 +++--- fs/partitions/ldm.c | 8 ++++---- fs/smbfs/smb_debug.h | 6 +++--- fs/sysfs/file.c | 2 +- fs/sysfs/mount.c | 2 +- fs/udf/super.c | 4 ++-- fs/vfat/namei.c | 2 +- 28 files changed, 57 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 936f2af39c4..831157502d5 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb); /* Misc */ void __adfs_error(struct super_block *sb, const char *function, const char *fmt, ...); -#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) +#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt) /* super.c */ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 2d4ae40718d..c3d352d7fa9 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -35,7 +35,7 @@ /* #define DEBUG */ #ifdef DEBUG -#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) +#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) #else #define DPRINTK(fmt,args...) do {} while(0) #endif diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 71faf4d2390..70f5d3a8eed 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode) #define printf(format, args...) \ - printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) + printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args) /* inode.c */ extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); diff --git a/fs/buffer.c b/fs/buffer.c index 189efa4efc6..a073f3f4f01 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) printk(KERN_ERR "%s: requested out-of-range block %llu for " "device %s\n", - __FUNCTION__, (unsigned long long)block, + __func__, (unsigned long long)block, bdevname(bdev, b)); return -EIO; } diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 397cb503a18..2b6cb23dd14 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __FUNCTION__, count, *ppos, buffer->page); + __func__, count, *ppos, buffer->page); retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index de3b31d0a37..8421cea7d8c 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) root = d_alloc_root(inode); if (!root) { - pr_debug("%s: could not get root dentry!\n",__FUNCTION__); + pr_debug("%s: could not get root dentry!\n",__func__); iput(inode); return -ENOMEM; } diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 78929ea84ff..2a731ef5f30 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite if (size > PATH_MAX) return -ENAMETOOLONG; - pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); + pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size); for (s = path; depth--; s += 3) strcpy(s,"../"); fill_item_path(target, path, size); - pr_debug("%s: path = '%s'\n", __FUNCTION__, path); + pr_debug("%s: path = '%s'\n", __func__, path); return 0; } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index b64e55e0515..499e16759e9 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void) dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); if (!dlm_kset) { - printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); + printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; } return 0; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 109ab5e44ec..cc91227d3bb 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) if (IS_ERR(ppd)) { err = PTR_ERR(ppd); dprintk("%s: get_parent of %ld failed, err %d\n", - __FUNCTION__, pd->d_inode->i_ino, err); + __func__, pd->d_inode->i_ino, err); dput(pd); break; } - dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, + dprintk("%s: find name of %lu in %lu\n", __func__, pd->d_inode->i_ino, ppd->d_inode->i_ino); err = exportfs_get_name(mnt, ppd, nbuf, pd); if (err) { @@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) continue; break; } - dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); + dprintk("%s: found name: %s\n", __func__, nbuf); mutex_lock(&ppd->d_inode->i_mutex); npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); mutex_unlock(&ppd->d_inode->i_mutex); if (IS_ERR(npd)) { err = PTR_ERR(npd); dprintk("%s: lookup failed: %d\n", - __FUNCTION__, err); + __func__, err); dput(ppd); dput(pd); break; @@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) if (npd == pd) noprogress = 0; else - printk("%s: npd != pd\n", __FUNCTION__); + printk("%s: npd != pd\n", __func__); dput(npd); dput(ppd); if (IS_ROOT(pd)) { diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 639b3b4f86d..fda25479af2 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { fat_fs_panic(sb, "%s: detected the cluster chain loop" - " (i_pos %lld)", __FUNCTION__, + " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; @@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) goto out; else if (nr == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: invalid cluster chain" - " (i_pos %lld)", __FUNCTION__, + " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; @@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return ret; else if (ret == FAT_ENT_EOF) { fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", - __FUNCTION__, MSDOS_I(inode)->i_pos); + __func__, MSDOS_I(inode)->i_pos); return -EIO; } return dclus; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 13ab763cc51..302e95c4af7 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster) goto error; } else if (cluster == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", - __FUNCTION__); + __func__); err = -EIO; goto error; } diff --git a/fs/fat/file.c b/fs/fat/file.c index d604bb13242..27cc1164ec3 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip) } else if (ret == FAT_ENT_FREE) { fat_fs_panic(sb, "%s: invalid cluster chain (i_pos %lld)", - __FUNCTION__, MSDOS_I(inode)->i_pos); + __func__, MSDOS_I(inode)->i_pos); ret = -EIO; } else if (ret > 0) { err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 8479da47049..a4ff271df9e 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c @@ -212,7 +212,7 @@ int gdlm_sysfs_init(void) { gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); if (!gdlm_kset) { - printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); + printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; } return 0; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 509c5d60bd8..7f48576289c 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, #define gfs2_assert_withdraw(sdp, assertion) \ ((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ - __FUNCTION__, __FILE__, __LINE__)) + __func__, __FILE__, __LINE__)) int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, @@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, #define gfs2_assert_warn(sdp, assertion) \ ((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ - __FUNCTION__, __FILE__, __LINE__)) + __func__, __FILE__, __LINE__)) int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist(sdp) \ -gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__) int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist_inode(ip) \ -gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__) int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, const char *function, char *file, unsigned int line); #define gfs2_consist_rgrpd(rgd) \ -gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) +gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__) int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, } #define gfs2_meta_check(sdp, bh) \ -gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) +gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__) int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, } #define gfs2_metatype_check(sdp, bh, type) \ -gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) +gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__) static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, u16 format) @@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); #define gfs2_io_error(sdp) \ -gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); +gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 9645275023e..a113ecc3baf 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -82,28 +82,28 @@ do { \ printk(JFFS2_ERR_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_WARNING(fmt, ...) \ do { \ printk(JFFS2_WARN_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_NOTICE(fmt, ...) \ do { \ printk(JFFS2_NOTICE_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_DEBUG(fmt, ...) \ do { \ printk(JFFS2_DBG_MSG_PREFIX \ " (%d) %s: " fmt, task_pid_nr(current), \ - __FUNCTION__ , ##__VA_ARGS__); \ + __func__ , ##__VA_ARGS__); \ } while(0) /* diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index e48665984cb..574cb7532d6 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_ static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ - D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); + D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version)); if (xd->xname) { c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); kfree(xd->xname); @@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); if (rc) { JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", - __FUNCTION__, rc, totlen); + __func__, rc, totlen); rc = rc ? rc : -EBADFD; goto out; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 40b16f23e49..5df517b81f3 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -573,7 +573,7 @@ again: /* Ensure the resulting lock will get added to granted list */ fl->fl_flags |= FL_SLEEP; if (do_vfs_lock(fl) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); up_read(&host->h_rwsem); fl->fl_flags = fl_flags; status = 0; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 4d81553d294..81aca859bfd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) return; default: printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", - -error, __FUNCTION__); + -error, __func__); nlmsvc_insert_block(block, 10 * HZ); nlmsvc_release_block(block); return; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 2d4358c59f6..05ff4f1d702 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -609,7 +609,7 @@ error_inode: if (corrupt < 0) { fat_fs_panic(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", - __FUNCTION__, sinfo.i_pos); + __func__, sinfo.i_pos); } goto out; } diff --git a/fs/namespace.c b/fs/namespace.c index 061e5edb4d2..4fc302c2a0e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2329,10 +2329,10 @@ void __init mnt_init(void) err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", - __FUNCTION__, err); + __func__, err); fs_kobj = kobject_create_and_add("fs", NULL); if (!fs_kobj) - printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); + printk(KERN_WARNING "%s: kobj create error\n", __func__); init_rootfs(); init_mount_tree(); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 562abf3380d..0b3ffa9840c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes) } while (0) #define RESERVE_SPACE(nbytes) do { \ p = xdr_reserve_space(xdr, nbytes); \ - if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \ BUG_ON(!p); \ } while (0) @@ -134,7 +134,7 @@ xdr_error: \ p = xdr_inline_decode(xdr, nbytes); \ if (!p) { \ dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ - __FUNCTION__, __LINE__); \ + __func__, __LINE__); \ return -EIO; \ } \ } while (0) diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 8ac37c33d12..5e6724c1afd 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...); extern void __ntfs_debug (const char *file, int line, const char *function, const char *format, ...) __attribute__ ((format (printf, 4, 5))); #define ntfs_debug(f, a...) \ - __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) + __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) extern void ntfs_debug_dump_runlist(const runlist_element *rl); @@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl); extern void __ntfs_warning(const char *function, const struct super_block *sb, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) +#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a) extern void __ntfs_error(const char *function, const struct super_block *sb, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) +#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a) #endif /* _LINUX_NTFS_DEBUG_H */ diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index e7dd1d4e347..0fdda2e8a4c 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -41,12 +41,12 @@ #ifndef CONFIG_LDM_DEBUG #define ldm_debug(...) do {} while (0) #else -#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) +#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a) #endif -#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) -#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) -#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) +#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a) +#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) +#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) __attribute__ ((format (printf, 3, 4))) static void _ldm_printk (const char *level, const char *function, diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h index 734972b9269..fc4b1a5dd75 100644 --- a/fs/smbfs/smb_debug.h +++ b/fs/smbfs/smb_debug.h @@ -11,14 +11,14 @@ * these are normally enabled. */ #ifdef SMBFS_PARANOIA -# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) +# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a) #else # define PARANOIA(f, a...) do { ; } while(0) #endif /* lots of debug messages */ #ifdef SMBFS_DEBUG_VERBOSE -# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) +# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a) #else # define VERBOSE(f, a...) do { ; } while(0) #endif @@ -28,7 +28,7 @@ * too common name. */ #ifdef SMBFS_DEBUG -#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) +#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a) #else #define DEBUG1(f, a...) do { ; } while(0) #endif diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index dbdfabbfd60..e7735f643cd 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __FUNCTION__, count, *ppos, buffer->page); + __func__, count, *ppos, buffer->page); retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 74168266cd5..14f0023984d 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) /* instantiate and link root dentry */ root = d_alloc_root(inode); if (!root) { - pr_debug("%s: could not get root dentry!\n",__FUNCTION__); + pr_debug("%s: could not get root dentry!\n",__func__); iput(inode); return -ENOMEM; } diff --git a/fs/udf/super.c b/fs/udf/super.c index b564fc140fe..9fb18a340fc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), GFP_KERNEL); if (!sbi->s_partmaps) { - udf_error(sb, __FUNCTION__, + udf_error(sb, __func__, "Unable to allocate space for %d partition maps", count); sbi->s_partitions = 0; @@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ if (bitmap == NULL) { - udf_error(sb, __FUNCTION__, + udf_error(sb, __func__, "Unable to allocate space for bitmap " "and %d buffer_head pointers", nr_groups); return NULL; diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 5b66162d074..a3522727ea5 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -986,7 +986,7 @@ error_inode: if (corrupt < 0) { fat_fs_panic(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", - __FUNCTION__, sinfo.i_pos); + __func__, sinfo.i_pos); } goto out; } -- cgit v1.2.3 From 40a2159abf3d0107bba359246554bd7d56f2171b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Apr 2008 15:59:58 +0100 Subject: sysfs: Disallow truncation of files in sysfs sysfs allows attribute files to be truncated, e.g. using ftruncate(), with the expected effect on their inode. For most attributes, this doesn't change the "real" size of the file i.e. how much can be read from it. However, the parameter validation for reading and writing binary attribute files is based on the inode size and not the size specified in the file's bin_attribute, so it can be broken by this. For example, if we try using dd to write to such a file: # pwd /sys/bus/pci/devices/0000:08:00.0 # ls -l config -rw-r--r-- 1 root root 4096 Feb 1 17:35 config # dd if=/dev/zero of=config bs=4 count=1 1+0 records in 1+0 records out # ls -l config -rw-r--r-- 1 root root 0 Feb 1 17:50 config # dd if=/dev/zero of=config bs=4 count=1 seek=128 dd: writing `config': No space left on device 1+0 records in 0+0 records out Also, after truncation to 0, parameter validation for read and write is disabled. Most bin_attribute read and write methods also validate the size and offset, but for some this will allow out-of-range access. This may be a security issue, though access to such files is often limited to root. In any case, the validation should remain for safety's sake!) This was previously reported in Bugzilla as bug 9867. sysfs should ignore size changes or else refuse them (by returning -EINVAL). This patch makes it ignore them. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index f8b82e73b3b..eb53c632f85 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) if (error) return error; + iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */ + error = inode_setattr(inode, iattr); if (error) return error; -- cgit v1.2.3 From 883ce42ec45c2dbef5be7c133ade9741ac978329 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 25 Apr 2008 08:52:51 -0400 Subject: DEBUGFS: Correct location of debugfs API documentation. Signed-off-by: Robert P. J. Day Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index fddffe4851f..159a5efd6a8 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. * */ -- cgit v1.2.3 From 9d80f7539a91c0154e40fc9e4ae5e818dd8f102e Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 22 Apr 2008 11:46:44 -0700 Subject: ocfs2: Correct merge of 52f7c21 (Move /sys/o2cb to /sys/fs/o2cb) Commit 52f7c21b613f80cb425d115c9e5b4ed958a133c0 was intended to move /sys/o2cb to /sys/fs/o2cb, providing /sys/o2cb as a symlink for backwards compatibility. However, the merge apparently added the symlink but failed to move the directory, resulting in a duplicate filename error. It's a one-line change that was missing. Signed-off-by: Joel Becker Acked-by: Randy Dunlap Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 98429fd6849..bc702dab5d1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -65,7 +65,7 @@ int o2cb_sys_init(void) { int ret; - o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); + o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj); if (!o2cb_kset) return -ENOMEM; -- cgit v1.2.3 From 4d8755b5e667df8f01647773ba744a5ac97e68e6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 21 Apr 2008 11:49:26 +0300 Subject: ocfs2: make struct ocfs2_control_device static This patch makes the needlessly global struct ocfs2_control_device static. Signed-off-by: Adrian Bunk Acked-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 7428663f9cb..b503772cd0e 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = { .owner = THIS_MODULE, }; -struct miscdevice ocfs2_control_device = { +static struct miscdevice ocfs2_control_device = { .minor = MISC_DYNAMIC_MINOR, .name = "ocfs2_control", .fops = &ocfs2_control_fops, -- cgit v1.2.3 From 4af694e672aaa85940d6e29d27b7eeea5f6eb258 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 21 Apr 2008 11:49:31 +0300 Subject: ocfs2: make struct o2cb_stack_ops static This patch makes the needlessly global struct o2cb_stack_ops static. Signed-off-by: Adrian Bunk Acked-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index ac1d74c63bf..bbd1667aa7d 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node) return 0; } -struct ocfs2_stack_operations o2cb_stack_ops = { +static struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .hangup = o2cb_cluster_hangup, -- cgit v1.2.3 From 95642e56647d84963428a1168baa8a73cb782ac3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 21 Apr 2008 11:49:37 +0300 Subject: ocfs2/dlm: dlmdebug.c: make 2 functions static This patch makes the following needlessly global functions static: - stringify_lockname() - dlm_debug_put() Signed-off-by: Adrian Bunk Acked-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmdebug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5f6d858770a..1b81dcba175 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -44,7 +44,8 @@ #define MLOG_MASK_PREFIX ML_DLM #include "cluster/masklog.h" -int stringify_lockname(const char *lockname, int locklen, char *buf, int len); +static int stringify_lockname(const char *lockname, int locklen, char *buf, + int len); void dlm_print_one_lock_resource(struct dlm_lock_resource *res) { @@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname); * * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. */ -int stringify_lockname(const char *lockname, int locklen, char *buf, int len) +static int stringify_lockname(const char *lockname, int locklen, char *buf, + int len) { int out = 0; __be64 inode_blkno_be; @@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref) kfree(dc); } -void dlm_debug_put(struct dlm_debug_ctxt *dc) +static void dlm_debug_put(struct dlm_debug_ctxt *dc) { if (dc) kref_put(&dc->debug_refcnt, dlm_debug_free); -- cgit v1.2.3 From bc535809c06ada210d89f5a43b335c68ecbb8e1b Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 18 Apr 2008 10:23:53 -0700 Subject: ocfs2: Allow uid/gid/perm changes of symlinks This patch adds the ability to change attributes of a symlink. Fixes oss bugzilla#963 http://oss.oracle.com/bugzilla/show_bug.cgi?id=963 Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/file.c | 4 ++++ fs/ocfs2/symlink.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9154c82d325..57e0d30cde9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); + /* ensuring we don't even attempt to truncate a symlink */ + if (S_ISLNK(inode->i_mode)) + attr->ia_valid &= ~ATTR_SIZE; + if (attr->ia_valid & ATTR_MODE) mlog(0, "mode change: %d\n", attr->ia_mode); if (attr->ia_valid & ATTR_UID) diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 7134007ba22..ba9dbb51d25 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = { .readlink = page_readlink, .follow_link = ocfs2_follow_link, .getattr = ocfs2_getattr, + .setattr = ocfs2_setattr, }; const struct inode_operations ocfs2_fast_symlink_inode_operations = { .readlink = ocfs2_readlink, .follow_link = ocfs2_follow_link, .getattr = ocfs2_getattr, + .setattr = ocfs2_setattr, }; -- cgit v1.2.3 From 4ba1c5bfd2e5a6c9528eb7777b66c297e70f61ca Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 18 Apr 2008 15:03:59 -0700 Subject: ocfs2: Use GFP_NOFS in kmalloc during localalloc window move kmalloc() during a localalloc window move can trigger the mm to prune the dcache which inturn can trigger the fs to delete an inode causing it start a recursive transaction. The fix also makes the change in kmalloc during localalloc shutdown just to be safe. Fixes oss bugzilla#901 http://oss.oracle.com/bugzilla/show_bug.cgi?id=901 Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/localalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ce0dc147602..be774bdc8b3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; - alloc_copy = kmalloc(bh->b_size, GFP_KERNEL); + alloc_copy = kmalloc(bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; @@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, * local alloc shutdown won't try to double free main bitmap * bits. Make a copy so the sync function knows which bits to * free. */ - alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL); + alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; mlog_errno(status); -- cgit v1.2.3 From 214b7049a7929f03bbd2786aaef04b8b79db34e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 May 2008 03:52:22 +0100 Subject: Fix dnotify/close race We have a race between fcntl() and close() that can lead to dnotify_struct inserted into inode's list *after* the last descriptor had been gone from current->files. Since that's the only point where dnotify_struct gets evicted, we are screwed - it will stick around indefinitely. Even after struct file in question is gone and freed. Worse, we can trigger send_sigio() on it at any later point, which allows to send an arbitrary signal to arbitrary process if we manage to apply enough memory pressure to get the page that used to host that struct file and fill it with the right pattern... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/dnotify.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/dnotify.c b/fs/dnotify.c index 28d01ed66de..eaecc4cfe54 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -20,6 +20,7 @@ #include #include #include +#include int dir_notify_enable __read_mostly = 1; @@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) struct dnotify_struct **prev; struct inode *inode; fl_owner_t id = current->files; + struct file *f; int error = 0; if ((arg & ~DN_MULTISHOT) == 0) { @@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) prev = &odn->dn_next; } + rcu_read_lock(); + f = fcheck(fd); + rcu_read_unlock(); + /* we'd lost the race with close(), sod off silently */ + /* note that inode->i_lock prevents reordering problems + * between accesses to descriptor table and ->i_dnotify */ + if (f != filp) + goto out_free; + error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); if (error) goto out_free; -- cgit v1.2.3 From 4e571aba7bb25a3a069a7b88c0f63fe5a14c05c6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 May 2008 12:28:04 +0100 Subject: [JFFS2] Clean up jffs2_alloc_inode() and jffs2_i_init_once() Ditch a couple of pointless casts from void *, and use the normal variable name 'f' for jffs2_inode_info pointers -- especially since it actually shows up in lockdep reports. Signed-off-by: David Woodhouse --- fs/jffs2/super.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f3353df178e..7da69eae49e 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { - struct jffs2_inode_info *ei; - ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); - if (!ei) + struct jffs2_inode_info *f; + + f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); + if (!f) return NULL; - return &ei->vfs_inode; + return &f->vfs_inode; } static void jffs2_destroy_inode(struct inode *inode) @@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode) static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) { - struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; + struct jffs2_inode_info *f = foo; - mutex_init(&ei->sem); - inode_init_once(&ei->vfs_inode); + mutex_init(&f->sem); + inode_init_once(&f->vfs_inode); } static int jffs2_sync_fs(struct super_block *sb, int wait) -- cgit v1.2.3 From 590fe34c47cb5c2d836ac76fabc5f160bf31a3f1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 May 2008 15:53:28 +0100 Subject: [JFFS2] Quiet lockdep false positive. Don't hold f->sem while calling into jffs2_do_create(). It makes lockdep unhappy, and we don't really need it -- the _reason_ it's a false positive is because nobody else can see this inode yet and so nobody will be trying to lock it anyway. Signed-off-by: David Woodhouse --- fs/jffs2/dir.c | 7 +++++++ fs/jffs2/write.c | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index c63e7a96af0..2bba3d3435b 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, f = JFFS2_INODE_INFO(inode); dir_f = JFFS2_INODE_INFO(dir_i); + /* jffs2_do_create() will want to lock it, _after_ reserving + space and taking c-alloc_sem. If we keep it locked here, + lockdep gets unhappy (although it's a false positive; + nothing else will be looking at this inode yet so there's + no chance of AB-BA deadlock involving its f->sem). */ + mutex_unlock(&f->sem); + ret = jffs2_do_create(c, dir_f, f, ri, dentry->d_name.name, dentry->d_name.len); if (ret) diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index 665fce9797d..87891bdd791 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -438,10 +438,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); - if (ret) { - mutex_unlock(&f->sem); + if (ret) return ret; - } + + mutex_lock(&f->sem); ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); -- cgit v1.2.3 From 02c6be615f1fcd37ac5ed93a3ad6692ad8991cd9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 May 2008 04:34:45 -0700 Subject: vfs: fix permission checking in sys_utimensat If utimensat() is called with both times set to UTIME_NOW or one of them to UTIME_NOW and the other to UTIME_OMIT, then it will update the file time without any permission checking. I don't think this can be used for anything other than a local DoS, but could be quite bewildering at that (e.g. "Why was that large source tree rebuilt when I didn't modify anything???") This affects all kernels from 2.6.22, when the utimensat() syscall was introduced. Fix by doing the same permission checking as for the "times == NULL" case. Thanks to Michael Kerrisk, whose utimensat-non-conformances-and-fixes.patch in -mm also fixes this (and breaks other stuff), only he didn't realize the security implications of this bug. Signed-off-by: Miklos Szeredi Cc: Ulrich Drepper Cc: Michael Kerrisk Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/utimes.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index a2bef77dc9c..af059d5cb48 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif +static bool nsec_special(long nsec) +{ + return nsec == UTIME_OMIT || nsec == UTIME_NOW; +} + static bool nsec_valid(long nsec) { - if (nsec == UTIME_OMIT || nsec == UTIME_NOW) + if (nsec_special(nsec)) return true; return nsec >= 0 && nsec <= 999999999; @@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } else { + } + + /* + * If times is NULL or both times are either UTIME_OMIT or + * UTIME_NOW, then need to check permissions, because + * inode_change_ok() won't do it. + */ + if (!times || (nsec_special(times[0].tv_nsec) && + nsec_special(times[1].tv_nsec))) { error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; -- cgit v1.2.3 From afec570c32a0d116e3c68af583ed1d11110f12fc Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 1 May 2008 04:35:06 -0700 Subject: autofs4: fix sparse warning in waitq.c:autofs4_expire_indirect() Re-order some code in expire.c:autofs4_expire_indirect() to avoid compile warning, reported by Harvey Harrison: CHECK fs/autofs4/expire.c fs/autofs4/expire.c:383:2: warning: context imbalance in 'autofs4_expire_indirect' - unexpected unlock Signed-off-by: Ian Kent Reviewed-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index d96e5c14a9c..cfa12db8f41 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Can we expire this guy */ if (autofs4_can_expire(dentry, timeout, do_now)) { expired = dentry; - break; + goto found; } goto next; } @@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, inf->flags |= AUTOFS_INF_EXPIRING; spin_unlock(&sbi->fs_lock); expired = dentry; - break; + goto found; } spin_unlock(&sbi->fs_lock); /* @@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb, expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); if (expired) { dput(dentry); - break; + goto found; } } next: @@ -371,18 +371,16 @@ next: spin_lock(&dcache_lock); next = next->next; } - - if (expired) { - DPRINTK("returning %p %.*s", - expired, (int)expired->d_name.len, expired->d_name.name); - spin_lock(&dcache_lock); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); - spin_unlock(&dcache_lock); - return expired; - } spin_unlock(&dcache_lock); - return NULL; + +found: + DPRINTK("returning %p %.*s", + expired, (int)expired->d_name.len, expired->d_name.name); + spin_lock(&dcache_lock); + list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + spin_unlock(&dcache_lock); + return expired; } /* Perform an expiry operation */ -- cgit v1.2.3 From cab0936aac8aa907c6bb814c2cf26385478f254b Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 1 May 2008 04:35:07 -0700 Subject: autofs4: check for invalid dentry in getpath Catch invalid dentry when calculating its path. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/waitq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 1fe28e4754c..75e5955c3f6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; - if (--len > NAME_MAX) { + if (!len || --len > NAME_MAX) { spin_unlock(&dcache_lock); return 0; } -- cgit v1.2.3 From 033790449ba9c4dcf8478a87693d33df625c23b5 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 1 May 2008 04:35:08 -0700 Subject: autofs4: fix execution order race in mount request code Jeff Moyer has identified a race in due to an execution order dependency in the autofs4 function root.c:try_to_fill_dentry(). Jeff's description of this race is: "P1 does a lookup of /mount/submount/foo. Since the VFS can't find an entry for "foo" under /mount/submount, it calls into the autofs4 kernel module to allocate a new dentry, D1. The kernel creates a new waitq for this lookup and calls the daemon to perform the mount. The daemon performs a mkdir of the "foo" directory under /mount/submount, which ends up creating a *new* dentry, D2. Then, P2 does a lookup of /mount/submount/foo. The VFS path walking logic finds a dentry in the dcache, D2, and calls the revalidate function with this. In the autofs4 revalidate code, we then trigger a mount, since the dentry is an empty directory that isn't a mountpoint, and so set DCACHE_AUTOFS_PENDING and call into the wait code to trigger the mount. The wait code finds our existing waitq entry (since it is keyed off of the directory name) and adds itself to the list of waiters. After the daemon finishes the mount, it calls back into the kernel to release the waiters. When this happens, P1 is woken up and goes about clearing the DCACHE_AUTOFS_PENDING flag, but it does this in D1! So, given that P1 in our case is a program that will immediately try to access a file under /mount/submount/foo, we end up finding the dentry D2 which still has the pending flag set, and we set out to wait for a mount *again*! So, one way to address this is to re-do the lookup at the end of try_to_fill_dentry, and to clear the pending flag on the hashed dentry. This seems a sane approach to me." And Jeff's patch does this. Signed-off-by: Jeff Moyer Signed-off-by-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index aa4c5ff8a40..0533d37c73a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -242,6 +242,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct dentry *new; int status = 0; /* Block on any pending expiry here; invalidate the dentry @@ -318,6 +319,27 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); + + /* + * The dentry that is passed in from lookup may not be the one + * we end up using, as mkdir can create a new one. If this + * happens, and another process tries the lookup at the same time, + * it will set the PENDING flag on this new dentry, but add itself + * to our waitq. Then, if after the lookup succeeds, the first + * process that requested the mount performs another lookup of the + * same directory, it will show up as still pending! So, we need + * to redo the lookup here and clear pending on that dentry. + */ + if (d_unhashed(dentry)) { + new = d_lookup(dentry->d_parent, &dentry->d_name); + if (new) { + spin_lock(&new->d_lock); + new->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&new->d_lock); + dput(new); + } + } + return status; } -- cgit v1.2.3 From 9d2de6ad2a78bb8b60bf7a54e6043dca44e9a801 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 1 May 2008 04:35:09 -0700 Subject: autofs4: fix incorrect return from root.c:try_to_fill_dentry() Jeff Moyer has identified a case where the autofs4 function root.c:try_to_fill_dentry() can return -EBUSY when it should return 0. Jeff's description of the way this happens is: "automount starts an expire for directory d. after the callout to the daemon, but before the rmdir, another process tries to walk into the same directory. It puts itself onto the waitq, pending the expiration. When the expire finishes, the second process is woken up. In try_to_fill_dentry, it does this check: status = d_invalidate(dentry); if (status != -EBUSY) return -EAGAIN; And status is EBUSY. The dentry still has a non-zero d_inode, and the flags do not contain LOOKUP_CONTINUE or LOOKUP_DIRECTORY So, we fall through and return -EBUSY to the caller." Signed-off-by: Jeff Moyer Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 0533d37c73a..6e250030a69 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -243,7 +243,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); struct dentry *new; - int status = 0; + int status; /* Block on any pending expiry here; invalidate the dentry when expiration is done to trigger mount request with a new @@ -340,7 +340,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) } } - return status; + return 0; } /* For autofs direct mounts the follow link triggers the mount */ -- cgit v1.2.3 From 868eb7a8539d3e8c494209be2b1f4084a274dfef Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Thu, 1 May 2008 04:35:10 -0700 Subject: autofs: path_{get,put}() cleanups Here are some more places where path_{get,put}() can be used instead of dput()/mntput() pair. Besides that it fixes a bug in autofs4_mount_busy() where mntput() was called before dput(). Signed-off-by: Jan Blunck Cc: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 2 +- fs/autofs4/root.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cfa12db8f41..894fee54d4d 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) status = 0; done: DPRINTK("returning = %d", status); - mntput(mnt); dput(dentry); + mntput(mnt); return status; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 6e250030a69..edf5b6bddb5 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) if (d_mountpoint(dentry)) { struct file *fp = NULL; - struct vfsmount *fp_mnt = mntget(mnt); - struct dentry *fp_dentry = dget(dentry); + struct path fp_path = { .dentry = dentry, .mnt = mnt }; - if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { - dput(fp_dentry); - mntput(fp_mnt); + path_get(&fp_path); + + if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) { + path_put(&fp_path); dcache_dir_close(inode, file); goto out; } - fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); + fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags); status = PTR_ERR(fp); if (IS_ERR(fp)) { dcache_dir_close(inode, file); -- cgit v1.2.3 From bd7309677c937bf23296f6c81027123c84c5cc5c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 1 May 2008 04:35:15 -0700 Subject: fuse: use clamp() rather than nested min/max clamp() exists for this use. Signed-off-by: Harvey Harrison Cc: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9ced35b0068..f28cf8b46f8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -934,7 +934,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); + npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); npages = get_user_pages(current, current->mm, user_addr, npages, write, 0, req->pages, NULL); -- cgit v1.2.3 From 1b690b48786229571e590dd22fe01ecc22a8746b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 May 2008 16:59:24 +0100 Subject: [JFFS2] Invert last argument of jffs2_gc_fetch_inode(), make it boolean. We don't actually care about nlink; we only care whether the inode in question is unlinked or not. Signed-off-by: David Woodhouse --- fs/jffs2/fs.c | 9 +++++---- fs/jffs2/gc.c | 2 +- fs/jffs2/os-linux.h | 2 +- fs/jffs2/wbuf.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 3eb1c84b0a3..e14b185a80d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -586,11 +586,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c, } struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, - int inum, int nlink) + int inum, int unlinked) { struct inode *inode; struct jffs2_inode_cache *ic; - if (!nlink) { + + if (unlinked) { /* The inode has zero nlink but its nodes weren't yet marked obsolete. This has to be because we're still waiting for the final (close() and) iput() to happen. @@ -638,8 +639,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, return ERR_CAST(inode); } if (is_bad_inode(inode)) { - printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", - inum, nlink); + printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n", + inum, unlinked); /* NB. This will happen again. We need to do something appropriate here. */ iput(inode); return ERR_PTR(-EIO); diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index bad005664e3..d4db0efecea 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -401,7 +401,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) nlink = ic->nlink; spin_unlock(&c->inocache_lock); - f = jffs2_gc_fetch_inode(c, inum, nlink); + f = jffs2_gc_fetch_inode(c, inum, !nlink); if (IS_ERR(f)) { ret = PTR_ERR(f); goto release_sem; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 1b10d259409..2cc866cf134 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); void jffs2_gc_release_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f); struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, - int inum, int nlink); + int inum, int unlinked); unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct jffs2_inode_info *f, diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 8de52b60767..92ab32a987b 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) /* If it's an in-core inode, then we have to adjust any full_dirent or full_dnode structure to point to the new version instead of the old */ - f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); + f = jffs2_gc_fetch_inode(c, ic->ino, !ic->nlink); if (IS_ERR(f)) { /* Should never happen; it _must_ be present */ JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", -- cgit v1.2.3 From a2dcb44c3c5a8151d2d9f6ac8ad0789efcdbe184 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Apr 2008 14:05:15 -0400 Subject: [PATCH] make osf_select() use core_sys_select() ... instead of open-coding it Signed-off-by: Al Viro --- fs/select.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 2c292146e24..80d70387e79 100644 --- a/fs/select.c +++ b/fs/select.c @@ -298,7 +298,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) #define MAX_SELECT_SECONDS \ ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, +int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s64 *timeout) { fd_set_bits fds; -- cgit v1.2.3 From 9f3acc3140444a900ab280de942291959f0f615d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Apr 2008 07:44:08 -0400 Subject: [PATCH] split linux/file.h Initial splitoff of the low-level stuff; taken to fdtable.h Signed-off-by: Al Viro --- fs/compat.c | 1 + fs/dnotify.c | 2 +- fs/exec.c | 1 + fs/fcntl.c | 1 + fs/file.c | 1 + fs/file_table.c | 1 + fs/locks.c | 1 + fs/open.c | 1 + fs/proc/array.c | 1 + fs/proc/base.c | 1 + fs/select.c | 1 + 11 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 139dc93c092..332a869d2c5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/dnotify.c b/fs/dnotify.c index eaecc4cfe54..676073b8dda 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include int dir_notify_enable __read_mostly = 1; diff --git a/fs/exec.c b/fs/exec.c index 9f9f931ef94..aeaa9791d8b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/fs/fcntl.c b/fs/fcntl.c index 3f3ac630ccd..bfd776509a7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index 5110acb1c9e..f6fbcb49faf 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file_table.c b/fs/file_table.c index 7a0a9b87225..83084225b4c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/locks.c b/fs/locks.c index 44d9a6a7ec5..663c069b59b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -116,6 +116,7 @@ #include #include +#include #include #include #include diff --git a/fs/open.c b/fs/open.c index 7af1f05d597..a1450086e92 100644 --- a/fs/open.c +++ b/fs/open.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index c135cbdd912..dca997a93bf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index fcf02f2deeb..808cbdc193d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index 80d70387e79..8dda969614a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -21,6 +21,7 @@ #include #include /* for STICKY_TIMEOUTS */ #include +#include #include #include -- cgit v1.2.3 From 2030a42cecd4dd1985a2ab03e25f3cd6106a5ca8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Feb 2008 06:46:49 -0500 Subject: [PATCH] sanitize anon_inode_getfd() a) none of the callers even looks at inode or file returned by anon_inode_getfd() b) any caller that would try to look at those would be racy, since by the time it returns we might have raced with close() from another thread and that file would be pining for fjords. Signed-off-by: Al Viro --- fs/anon_inodes.c | 13 +++---------- fs/eventfd.c | 15 +++++---------- fs/eventpoll.c | 23 ++++++++--------------- fs/signalfd.c | 17 ++++------------- fs/timerfd.c | 11 +++-------- 5 files changed, 23 insertions(+), 56 deletions(-) (limited to 'fs') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index f42be069e08..977ef208c05 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * anonymous inode, and a dentry that describe the "class" * of the file * - * @pfd: [out] pointer to the file descriptor - * @dpinode: [out] pointer to the inode - * @pfile: [out] pointer to the file struct * @name: [in] name of the "class" of the new file * @fops [in] file operations for the new file * @priv [in] private data for the new file (will be file's private_data) @@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = { * that do not need to have a full-fledged inode in order to operate correctly. * All the files created with anon_inode_getfd() will share a single inode, * hence saving memory and avoiding code duplication for the file/inode/dentry - * setup. + * setup. Returns new descriptor or -error. */ -int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, - const char *name, const struct file_operations *fops, +int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv) { struct qstr this; @@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, fd_install(fd, file); - *pfd = fd; - *pinode = anon_inode_inode; - *pfile = file; - return 0; + return fd; err_dput: dput(dentry); diff --git a/fs/eventfd.c b/fs/eventfd.c index a9f130cd50a..343942deeec 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd) asmlinkage long sys_eventfd(unsigned int count) { - int error, fd; + int fd; struct eventfd_ctx *ctx; - struct file *file; - struct inode *inode; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count) * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", - &eventfd_fops, ctx); - if (!error) - return fd; - - kfree(ctx); - return error; + fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); + if (fd < 0) + kfree(ctx); + return fd; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 221086fef17..990c01d2d66 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1050,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size) { int error, fd = -1; struct eventpoll *ep; - struct inode *inode; - struct file *file; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", current, size)); @@ -1061,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size) * structure ( "struct eventpoll" ). */ error = -EINVAL; - if (size <= 0 || (error = ep_alloc(&ep)) != 0) + if (size <= 0 || (error = ep_alloc(&ep)) < 0) { + fd = error; goto error_return; + } /* * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. + * a file structure and a free file descriptor. */ - error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", - &eventpoll_fops, ep); - if (error) - goto error_free; + fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); + if (fd < 0) + ep_free(ep); +error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", current, size, fd)); return fd; - -error_free: - ep_free(ep); -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, error)); - return error; } /* diff --git a/fs/signalfd.c b/fs/signalfd.c index 8ead0db3593..619725644c7 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = { asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) { - int error; sigset_t sigmask; struct signalfd_ctx *ctx; - struct file *file; - struct inode *inode; if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) @@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", - &signalfd_fops, ctx); - if (error) - goto err_fdalloc; + ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); + if (ufd < 0) + kfree(ctx); } else { - file = fget(ufd); + struct file *file = fget(ufd); if (!file) return -EBADF; ctx = file->private_data; @@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas } return ufd; - -err_fdalloc: - kfree(ctx); - return error; } - diff --git a/fs/timerfd.c b/fs/timerfd.c index 5400524e9cb..d87d354ec42 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -181,10 +181,8 @@ static struct file *timerfd_fget(int fd) asmlinkage long sys_timerfd_create(int clockid, int flags) { - int error, ufd; + int ufd; struct timerfd_ctx *ctx; - struct file *file; - struct inode *inode; if (flags) return -EINVAL; @@ -200,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); - error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", - &timerfd_fops, ctx); - if (error) { + ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); + if (ufd < 0) kfree(ctx); - return error; - } return ufd; } -- cgit v1.2.3 From 5c598b3428c372a1209597cee99a70da20625876 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Apr 2008 20:04:15 -0400 Subject: [PATCH] fix sysctl_nr_open bugs * if luser with root sets it to something that is not a multiple of BITS_PER_LONG, the system is screwed. * if it gets decreased at the wrong time, we can get expand_files() returning success and _not_ increasing the size of table as asked. Signed-off-by: Al Viro --- fs/file.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index f6fbcb49faf..4c6f0ea12c4 100644 --- a/fs/file.c +++ b/fs/file.c @@ -150,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > sysctl_nr_open) - nr = sysctl_nr_open; + /* + * Note that this can drive nr *below* what we had passed if sysctl_nr_open + * had been set lower between the check in expand_files() and here. Deal + * with that in caller, it's cheaper that way. + * + * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise + * bitmaps handling below becomes unpleasant, to put it mildly... + */ + if (unlikely(nr > sysctl_nr_open)) + nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -199,6 +207,16 @@ static int expand_fdtable(struct files_struct *files, int nr) spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; + /* + * extremely unlikely race - sysctl_nr_open decreased between the check in + * caller and alloc_fdtable(). Cheaper to catch it here... + */ + if (unlikely(new_fdt->max_fds <= nr)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + return -EMFILE; + } /* * Check again since another task may have expanded the fd table while * we dropped the lock -- cgit v1.2.3 From 27c72b040c0be8f3704ed0b6b84c12cbba24a7e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 May 2008 18:47:17 +0100 Subject: [JFFS2] Track parent inode for directories (for NFS export) To support NFS export, we need to know the parent inode of directories. Rather than growing the jffs2_inode_cache structure, share space with the nlink field -- which was always set to 1 for directories anyway. Signed-off-by: David Woodhouse --- fs/jffs2/build.c | 31 ++++++++++++++++++++----------- fs/jffs2/dir.c | 35 +++++++++++++++++++++++++---------- fs/jffs2/erase.c | 2 +- fs/jffs2/fs.c | 5 ++--- fs/jffs2/gc.c | 6 +++--- fs/jffs2/nodelist.h | 5 ++++- fs/jffs2/nodemgmt.c | 2 +- fs/jffs2/readinode.c | 7 ++++--- fs/jffs2/scan.c | 2 +- fs/jffs2/wbuf.c | 2 +- fs/jffs2/write.c | 11 ++++++----- fs/jffs2/xattr.c | 4 ++-- 12 files changed, 70 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index d58f845ccb8..c5e1450d79f 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic) { struct jffs2_full_dirent *fd; @@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, continue; } - if (child_ic->nlink++ && fd->type == DT_DIR) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } + if (fd->type == DT_DIR) { + if (child_ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", + fd->name, fd->ino, ic->ino); + /* TODO: What do we do about it? */ + } else { + child_ic->pino_nlink = ic->ino; + } + } else + child_ic->pino_nlink++; + dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ } @@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) dbg_fsbuild("pass 2 starting\n"); for_each_inode(i, c, ic) { - if (ic->nlink) + if (ic->pino_nlink) continue; jffs2_build_remove_unlinked_inode(c, ic, &dead_fds); @@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - child_ic->nlink--; + if (fd->type == DT_DIR) + child_ic->pino_nlink = 0; + else + child_ic->pino_nlink--; - if (!child_ic->nlink) { - dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n", + if (!child_ic->pino_nlink) { + dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", fd->ino, fd->name); fd->next = *dead_fds; *dead_fds = fd; } else { dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n", - fd->ino, fd->name, child_ic->nlink); + fd->ino, fd->name, child_ic->pino_nlink); jffs2_free_full_dirent(fd); } } diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 2bba3d3435b..c0c141f6fde 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -226,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, d_instantiate(dentry, inode); D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", - inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); + inode->i_ino, inode->i_mode, inode->i_nlink, + f->inocache->pino_nlink, inode->i_mapping->nrpages)); return 0; fail: @@ -250,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, dead_f, now); if (dead_f->inocache) - dentry->d_inode->i_nlink = dead_f->inocache->nlink; + dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink; if (!ret) dir_i->i_mtime = dir_i->i_ctime = ITIME(now); return ret; @@ -283,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de if (!ret) { mutex_lock(&f->sem); - old_dentry->d_inode->i_nlink = ++f->inocache->nlink; + old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink; mutex_unlock(&f->sem); d_instantiate(dentry, old_dentry->d_inode); dir_i->i_mtime = dir_i->i_ctime = ITIME(now); @@ -500,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) inode->i_op = &jffs2_dir_inode_operations; inode->i_fop = &jffs2_dir_operations; - /* Directories get nlink 2 at start */ - inode->i_nlink = 2; f = JFFS2_INODE_INFO(inode); + /* Directories get nlink 2 at start */ + inode->i_nlink = 2; + /* but ic->pino_nlink is the parent ino# */ + f->inocache->pino_nlink = dir_i->i_ino; + ri->data_crc = cpu_to_je32(0); ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); @@ -601,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) { + struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb); + struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i); struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); struct jffs2_full_dirent *fd; int ret; + uint32_t now = get_seconds(); for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; } - ret = jffs2_unlink(dir_i, dentry); - if (!ret) + + ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, + dentry->d_name.len, f, now); + if (!ret) { + dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + clear_nlink(dentry->d_inode); drop_nlink(dir_i); + } return ret; } @@ -824,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, inode which didn't exist. */ if (victim_f->inocache) { mutex_lock(&victim_f->sem); - victim_f->inocache->nlink--; + if (S_ISDIR(new_dentry->d_inode->i_mode)) + victim_f->inocache->pino_nlink = 0; + else + victim_f->inocache->pino_nlink--; mutex_unlock(&victim_f->sem); } } @@ -845,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); mutex_lock(&f->sem); inc_nlink(old_dentry->d_inode); - if (f->inocache) - f->inocache->nlink++; + if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode)) + f->inocache->pino_nlink++; mutex_unlock(&f->sem); printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 25a640e566d..5e86f43616a 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, break; #endif default: - if (ic->nodes == (void *)ic && ic->nlink == 0) + if (ic->nodes == (void *)ic && ic->pino_nlink == 0) jffs2_del_ino_cache(c, ic); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e14b185a80d..086c4383022 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); - inode->i_nlink = f->inocache->nlink; + inode->i_nlink = f->inocache->pino_nlink; inode->i_blocks = (inode->i_size + 511) >> 9; @@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) case S_IFDIR: { struct jffs2_full_dirent *fd; + inode->i_nlink = 2; /* parent and '.' */ for (fd=f->dents; fd; fd = fd->next) { if (fd->type == DT_DIR && fd->ino) inc_nlink(inode); } - /* and '..' */ - inc_nlink(inode); /* Root dir gets i_nlink 3 for some reason */ if (inode->i_ino == 1) inc_nlink(inode); diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index d4db0efecea..090c556ffed 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) continue; } - if (!ic->nlink) { - D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", + if (!ic->pino_nlink) { + D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n", ic->ino)); spin_unlock(&c->inocache_lock); jffs2_xattr_delete_inode(c, ic); @@ -398,7 +398,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) it's vaguely possible. */ inum = ic->ino; - nlink = ic->nlink; + nlink = ic->pino_nlink; spin_unlock(&c->inocache_lock); f = jffs2_gc_fetch_inode(c, inum, !nlink); diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 8219df6eb6d..1750445556c 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -177,7 +177,10 @@ struct jffs2_inode_cache { #ifdef CONFIG_JFFS2_FS_XATTR struct jffs2_xattr_ref *xref; #endif - int nlink; + uint32_t pino_nlink; /* Directories store parent inode + here; other inodes store nlink. + Zero always means that it's + completely unlinked. */ }; /* Inode states for 'state' above. We need the 'GC' state to prevent diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 9df8f3ef20d..a9bf9603c1b 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref break; #endif default: - if (ic->nodes == (void *)ic && ic->nlink == 0) + if (ic->nodes == (void *)ic && ic->pino_nlink == 0) jffs2_del_ino_cache(c, ic); break; } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 4cb4d76de07..9fc4833c117 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1123,7 +1123,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c, size_t retlen; int ret; - dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); + dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino, + f->inocache->pino_nlink); memset(&rii, 0, sizeof(rii)); @@ -1358,7 +1359,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, } dbg_readinode("creating inocache for root inode\n"); memset(f->inocache, 0, sizeof(struct jffs2_inode_cache)); - f->inocache->ino = f->inocache->nlink = 1; + f->inocache->ino = f->inocache->pino_nlink = 1; f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; f->inocache->state = INO_STATE_READING; jffs2_add_ino_cache(c, f->inocache); @@ -1401,7 +1402,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) jffs2_clear_acl(f); jffs2_xattr_delete_inode(c, f->inocache); mutex_lock(&f->sem); - deleted = f->inocache && !f->inocache->nlink; + deleted = f->inocache && !f->inocache->pino_nlink; if (f->inocache && f->inocache->state != INO_STATE_CHECKING) jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 272872d27fd..8c1e692bef7 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -940,7 +940,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin ic->nodes = (void *)ic; jffs2_add_ino_cache(c, ic); if (ino == 1) - ic->nlink = 1; + ic->pino_nlink = 1; return ic; } diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 92ab32a987b..0e78b00035e 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) /* If it's an in-core inode, then we have to adjust any full_dirent or full_dnode structure to point to the new version instead of the old */ - f = jffs2_gc_fetch_inode(c, ic->ino, !ic->nlink); + f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink); if (IS_ERR(f)) { /* Should never happen; it _must_ be present */ JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index 87891bdd791..ca29440e943 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -19,7 +19,8 @@ #include "compr.h" -int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri) +int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, + uint32_t mode, struct jffs2_raw_inode *ri) { struct jffs2_inode_cache *ic; @@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint memset(ic, 0, sizeof(*ic)); f->inocache = ic; - f->inocache->nlink = 1; + f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */ f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; f->inocache->state = INO_STATE_PRESENT; @@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, jffs2_mark_node_obsolete(c, fd->raw); jffs2_free_full_dirent(fd); } - } - - dead_f->inocache->nlink--; + dead_f->inocache->pino_nlink = 0; + } else + dead_f->inocache->pino_nlink--; /* NB: Caller must set inode nlink if appropriate */ mutex_unlock(&dead_f->sem); } diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index e48665984cb..05531f291bf 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache When an inode with XATTR is removed, those XATTRs must be removed. */ struct jffs2_xattr_ref *ref, *_ref; - if (!ic || ic->nlink > 0) + if (!ic || ic->pino_nlink > 0) return; down_write(&c->xattr_sem); @@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ref->xd and ref->ic are not valid yet. */ xd = jffs2_find_xattr_datum(c, ref->xid); ic = jffs2_get_ino_cache(c, ref->ino); - if (!xd || !ic || !ic->nlink) { + if (!xd || !ic || !ic->pino_nlink) { dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", ref->ino, ref->xid, ref->xseqno); ref->xseqno |= XREF_DELETE_MARKER; -- cgit v1.2.3 From a98889f3d8882995b5aa2255b931cf0202325cc0 Mon Sep 17 00:00:00 2001 From: Jared Hulbert Date: Tue, 29 Apr 2008 23:26:49 -0700 Subject: [MTD][NOR] Add physical address to point() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding the ability to get a physical address from point() in addition to virtual address. This physical address is required for XIP of userspace code from flash. Signed-off-by: Jared Hulbert Reviewed-by: Jörn Engel Acked-by: Nicolas Pitre Acked-by: Greg Ungerer Signed-off-by: David Woodhouse --- fs/jffs2/erase.c | 7 ++++--- fs/jffs2/readinode.c | 9 +++++---- fs/jffs2/scan.c | 7 ++++--- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 5e86f43616a..dddb2a6c9e2 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (c->mtd->point) { unsigned long *wordebuf; - ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf); + ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, + &retlen, &ebuf, NULL); if (ret) { D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); goto do_flash_read; @@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (retlen < c->sector_size) { /* Don't muck about if it won't let us point to the whole erase sector */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); - c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen); + c->mtd->unpoint(c->mtd, jeb->offset, retlen); goto do_flash_read; } wordebuf = ebuf-sizeof(*wordebuf); @@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (*++wordebuf != ~0) break; } while(--retlen); - c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size); + c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size); if (retlen) { printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 9fc4833c117..6ca08ad887c 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), * adding and jffs2_flash_read_end() interface. */ if (c->mtd->point) { - err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); + err = c->mtd->point(c->mtd, ofs, len, &retlen, + (void **)&buffer, NULL); if (!err && retlen < len) { JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); - c->mtd->unpoint(c->mtd, buffer, ofs, retlen); + c->mtd->unpoint(c->mtd, ofs, retlen); } else if (err) JFFS2_WARNING("MTD point failed: error code %d.\n", err); else @@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, buffer, ofs, len); + c->mtd->unpoint(c->mtd, ofs, len); #endif if (crc != tn->data_crc) { @@ -136,7 +137,7 @@ free_out: kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, buffer, ofs, len); + c->mtd->unpoint(c->mtd, ofs, len); #endif return err; } diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 8c1e692bef7..1d437de1e9a 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) size_t pointlen; if (c->mtd->point) { - ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf); + ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, + (void **)&flashbuf, NULL); if (!ret && pointlen < c->mtd->size) { /* Don't muck about if it won't let us point to the whole flash */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); - c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen); + c->mtd->unpoint(c->mtd, 0, pointlen); flashbuf = NULL; } if (ret) @@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) kfree(flashbuf); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); + c->mtd->unpoint(c->mtd, 0, c->mtd->size); #endif if (s) kfree(s); -- cgit v1.2.3 From 78e92b99ec4eb73755abd4e357b0b211eadafd88 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 2 May 2008 04:12:41 -0700 Subject: netns: assign PDE->data before gluing entry into /proc tree In this unfortunate case, proc_mkdir_mode wrapper can't be used anymore and this is no way to reuse proc_create_data due to nlinks assignment. So, copy the code from proc_mkdir and assign PDE->data at the appropriate moment. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- fs/proc/generic.c | 17 +++++++++++++++++ fs/proc/proc_net.c | 11 ----------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 9d53b39a9cf..43e54e86cef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -641,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, return ent; } +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *ent; + + ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); + if (ent) { + ent->data = net; + if (proc_register(parent, ent) < 0) { + kfree(ent); + ent = NULL; + } + } + return ent; +} +EXPORT_SYMBOL_GPL(proc_net_mkdir); + struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 13cd7835d0d..83f357b30d7 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, - struct proc_dir_entry *parent) -{ - struct proc_dir_entry *pde; - pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); - if (pde != NULL) - pde->data = net; - return pde; -} -EXPORT_SYMBOL_GPL(proc_net_mkdir); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; -- cgit v1.2.3 From 5ade9deaaa3e1f7291467d97b238648e43eae15e Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 2 May 2008 20:56:23 +0000 Subject: [CIFS] fix typo Signed-off-by: Steve French --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a1f24bdb656..0d9d2e6d7af 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -389,7 +389,7 @@ int cifs_get_inode_info(struct inode **pinode, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); const unsigned char *full_path = NULL; char *buf = NULL; - bool adjustTZ = bool; + bool adjustTZ = false; bool is_dfs_referral = false; pTcon = cifs_sb->tcon; -- cgit v1.2.3 From d35c7b0e54a596c5a8134d75999b7f391a9c6550 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 3 May 2008 15:10:37 -0400 Subject: unified (weak) sys_pipe implementation This replaces the duplicated arch-specific versions of "sys_pipe()" with one unified implementation. This removes almost 250 lines of duplicated code. It's marked __weak, so that *if* an architecture wants to override the default implementation it can do so by simply having its own replacement version, since many architectures use alternate calling conventions for the 'pipe()' system call for legacy reasons (ie traditional UNIX implementations often return the two file descriptors in registers) I still haven't changed the cris version even though Linus says the BKL isn't needed. The arch maintainer can easily do it if there are really no obstacles. Signed-off-by: Ulrich Drepper Signed-off-by: Linus Torvalds --- fs/pipe.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index f73492b6817..3499f9ff631 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1075,6 +1075,23 @@ int do_pipe(int *fd) return error; } +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; +} + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need -- cgit v1.2.3 From eb28062f131b0a1da32b2554fd819af5221040de Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Sun, 4 May 2008 23:12:55 +0800 Subject: task_nommu: fix compile failing bug because of spilt file.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC fs/proc/task_nommu.o fs/proc/task_nommu.c: In function ‘task_mem’: fs/proc/task_nommu.c:55: error: dereferencing pointer to incomplete type make[2]: *** [fs/proc/task_nommu.o] Error 1 make[1]: *** [fs/proc] Error 2 make: *** [fs] Error 2 Signed-off-by: Bryan Wu Signed-off-by: Linus Torvalds --- fs/proc/task_nommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 4b733f10845..4b4f9cc2f18 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -1,6 +1,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From dca3c33652e437ed02c30ed3eca3cecd0cc00838 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Tue, 29 Apr 2008 17:02:20 +0200 Subject: [PATCH] fix reservation discarding in affs - remove affs_put_inode, so preallocations aren't discared unnecessarily often. - remove affs_drop_inode, it's called with a spinlock held, so it can't use a mutex. - make i_opencnt atomic - avoid direct b_count manipulations - a few allocation failure fixes, so that these are more gracefully handled now. Signed-off-by: Roman Zippel Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/affs/affs.h | 4 +--- fs/affs/file.c | 25 +++++++++++++++++-------- fs/affs/inode.c | 34 ++++++++-------------------------- fs/affs/namei.c | 6 ++++-- fs/affs/super.c | 18 +++++++++++------- 5 files changed, 41 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/affs/affs.h b/fs/affs/affs.h index d5bd497ab9c..223b1917093 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -48,7 +48,7 @@ struct affs_ext_key { * affs fs inode data in memory */ struct affs_inode_info { - u32 i_opencnt; + atomic_t i_opencnt; struct semaphore i_link_lock; /* Protects internal inode access. */ struct semaphore i_ext_lock; /* Protects internal inode access. */ #define i_hash_lock i_ext_lock @@ -170,8 +170,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, extern unsigned long affs_parent_ino(struct inode *dir); extern struct inode *affs_new_inode(struct inode *dir); extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); -extern void affs_put_inode(struct inode *inode); -extern void affs_drop_inode(struct inode *inode); extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, diff --git a/fs/affs/file.c b/fs/affs/file.c index 1a4f092f24e..6eac7bdeec9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp) { if (atomic_read(&filp->f_count) != 1) return 0; - pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt); - AFFS_I(inode)->i_opencnt++; + pr_debug("AFFS: open(%lu,%d)\n", + inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); + atomic_inc(&AFFS_I(inode)->i_opencnt); return 0; } @@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp) { if (atomic_read(&filp->f_count) != 0) return 0; - pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt); - AFFS_I(inode)->i_opencnt--; - if (!AFFS_I(inode)->i_opencnt) + pr_debug("AFFS: release(%lu, %d)\n", + inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); + + if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) { + mutex_lock(&inode->i_mutex); + if (inode->i_size != AFFS_I(inode)->mmu_private) + affs_truncate(inode); affs_free_prealloc(inode); + mutex_unlock(&inode->i_mutex); + } return 0; } @@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext) /* inline the simplest case: same extended block as last time */ struct buffer_head *bh = AFFS_I(inode)->i_ext_bh; if (ext == AFFS_I(inode)->i_ext_last) - atomic_inc(&bh->b_count); + get_bh(bh); else /* we have to do more (not inlined) */ bh = affs_get_extblock_slow(inode, ext); @@ -306,7 +313,7 @@ store_ext: affs_brelse(AFFS_I(inode)->i_ext_bh); AFFS_I(inode)->i_ext_last = ext; AFFS_I(inode)->i_ext_bh = bh; - atomic_inc(&bh->b_count); + get_bh(bh); return bh; @@ -324,7 +331,6 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); - BUG_ON(block > (sector_t)0x7fffffffUL); if (block >= AFFS_I(inode)->i_blkcnt) { @@ -827,6 +833,8 @@ affs_truncate(struct inode *inode) res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); if (!res) res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); + else + inode->i_size = AFFS_I(inode)->mmu_private; mark_inode_dirty(inode); return; } else if (inode->i_size == AFFS_I(inode)->mmu_private) @@ -862,6 +870,7 @@ affs_truncate(struct inode *inode) blk++; } else AFFS_HEAD(ext_bh)->first_data = 0; + AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i); size = AFFS_SB(sb)->s_hashsize; if (size > blkcnt - blk + i) size = blkcnt - blk + i; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 27fe6cbe43a..a13b334a391 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) AFFS_I(inode)->i_extcnt = 1; AFFS_I(inode)->i_ext_last = ~1; AFFS_I(inode)->i_protect = prot; - AFFS_I(inode)->i_opencnt = 0; + atomic_set(&AFFS_I(inode)->i_opencnt, 0); AFFS_I(inode)->i_blkcnt = 0; AFFS_I(inode)->i_lc = NULL; AFFS_I(inode)->i_lc_size = 0; @@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) inode->i_mode |= S_IFDIR; } else inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR; - if (tail->link_chain) - inode->i_nlink = 2; /* Maybe it should be controlled by mount parameter? */ //inode->i_mode |= S_ISVTX; inode->i_op = &affs_dir_inode_operations; @@ -244,32 +242,13 @@ out: return error; } -void -affs_put_inode(struct inode *inode) -{ - pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); - affs_free_prealloc(inode); -} - -void -affs_drop_inode(struct inode *inode) -{ - mutex_lock(&inode->i_mutex); - if (inode->i_size != AFFS_I(inode)->mmu_private) - affs_truncate(inode); - mutex_unlock(&inode->i_mutex); - - generic_drop_inode(inode); -} - void affs_delete_inode(struct inode *inode) { pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; - if (S_ISREG(inode->i_mode)) - affs_truncate(inode); + affs_truncate(inode); clear_inode(inode); affs_free_block(inode->i_sb, inode->i_ino); } @@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode) void affs_clear_inode(struct inode *inode) { - unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc; + unsigned long cache_page; pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + + affs_free_prealloc(inode); + cache_page = (unsigned long)AFFS_I(inode)->i_lc; if (cache_page) { pr_debug("AFFS: freeing ext cache\n"); AFFS_I(inode)->i_lc = NULL; @@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir) inode->i_ino = block; inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - AFFS_I(inode)->i_opencnt = 0; + atomic_set(&AFFS_I(inode)->i_opencnt, 0); AFFS_I(inode)->i_blkcnt = 0; AFFS_I(inode)->i_lc = NULL; AFFS_I(inode)->i_lc_size = 0; @@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 switch (type) { case ST_LINKFILE: case ST_LINKDIR: - inode_bh = bh; retval = -ENOSPC; block = affs_alloc_block(dir, dir->i_ino); if (!block) goto err; retval = -EIO; + inode_bh = bh; bh = affs_getzeroblk(sb, block); if (!bh) goto err; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 2218f1ee71c..cfcf1b6cf82 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) int affs_unlink(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino, + pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino, + dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); @@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode) int affs_rmdir(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino, + pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino, + dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); diff --git a/fs/affs/super.c b/fs/affs/super.c index 01d25d53254..d214837d5e4 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) { - struct affs_inode_info *ei; - ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); - if (!ei) + struct affs_inode_info *i; + + i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); + if (!i) return NULL; - ei->vfs_inode.i_version = 1; - return &ei->vfs_inode; + + i->vfs_inode.i_version = 1; + i->i_lc = NULL; + i->i_ext_bh = NULL; + i->i_pa_cnt = 0; + + return &i->vfs_inode; } static void affs_destroy_inode(struct inode *inode) @@ -114,8 +120,6 @@ static const struct super_operations affs_sops = { .alloc_inode = affs_alloc_inode, .destroy_inode = affs_destroy_inode, .write_inode = affs_write_inode, - .put_inode = affs_put_inode, - .drop_inode = affs_drop_inode, .delete_inode = affs_delete_inode, .clear_inode = affs_clear_inode, .put_super = affs_put_super, -- cgit v1.2.3 From 33dcdac2df54e66c447ae03f58c95c7251aa5649 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Apr 2008 17:46:26 +0200 Subject: [PATCH] kill ->put_inode And with that last patch to affs killing the last put_inode instance we can finally, after many years of transition kill this racy and awkward interface. (It's kinda funny that even the description in Documentation/filesystems/vfs.txt was entirely wrong..) Also remove a very misleading comment above the defintion of struct super_operations. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index bf647813042..18bdce14b70 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1153,9 +1153,6 @@ void iput(struct inode *inode) BUG_ON(inode->i_state == I_CLEAR); - if (op && op->put_inode) - op->put_inode(inode); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); } -- cgit v1.2.3 From 0b2bac2f1ea0d33a3621b27ca68b9ae760fca2e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 6 May 2008 13:58:34 -0400 Subject: [PATCH] fix SMP ordering hole in fcntl_setlk() fcntl_setlk()/close() race prevention has a subtle hole - we need to make sure that if we *do* have an fcntl/close race on SMP box, the access to descriptor table and inode->i_flock won't get reordered. As it is, we get STORE inode->i_flock, LOAD descriptor table entry vs. STORE descriptor table entry, LOAD inode->i_flock with not a single lock in common on both sides. We do have BKL around the first STORE, but check in locks_remove_posix() is outside of BKL and for a good reason - we don't want BKL on common path of close(2). Solution is to hold ->file_lock around fcheck() in there; that orders us wrt removal from descriptor table that preceded locks_remove_posix() on close path and we either come first (in which case eviction will be handled by the close side) or we'll see the effect of close and do eviction ourselves. Note that even though it's read-only access, we do need ->file_lock here - rcu_read_lock() won't be enough to order the things. Signed-off-by: Al Viro --- fs/locks.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 663c069b59b..0ac6b92cb0b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1753,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, struct file_lock *file_lock = locks_alloc_lock(); struct flock flock; struct inode *inode; + struct file *f; int error; if (file_lock == NULL) @@ -1825,7 +1826,15 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { + /* + * we need that spin_lock here - it prevents reordering between + * update of inode->i_flock and check for it done in close(). + * rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (!error && f != filp && flock.l_type != F_UNLCK) { flock.l_type = F_UNLCK; goto again; } @@ -1881,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, struct file_lock *file_lock = locks_alloc_lock(); struct flock64 flock; struct inode *inode; + struct file *f; int error; if (file_lock == NULL) @@ -1953,7 +1963,10 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (!error && f != filp && flock.l_type != F_UNLCK) { flock.l_type = F_UNLCK; goto again; } -- cgit v1.2.3 From 6ce07c7b61e74af35a05060a2d6341f68fd92c9e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 May 2008 13:13:37 -0700 Subject: VFS: fix unused variable warning Commit 33dcdac2df54e66c447ae03f58c95c7251aa5649 ("kill ->put_inode") removed the final use of i_op->put_inode, but left the now totally unused "op" variable in iput(). Get rid of it. Signed-off-by: Linus Torvalds --- fs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 18bdce14b70..c36d9480335 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1149,8 +1149,6 @@ static inline void iput_final(struct inode *inode) void iput(struct inode *inode) { if (inode) { - const struct super_operations *op = inode->i_sb->s_op; - BUG_ON(inode->i_state == I_CLEAR); if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) -- cgit v1.2.3 From dea570e08a69b14808b2cab56d6b0dda72145fa6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 May 2008 22:05:51 +0000 Subject: [CIFS] Remove over-indented code in find_unc(). Signed-off-by: Cyrill Gorcunov Signed-off-by: Steve French --- fs/cifs/connect.c | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6c4332f8da6..f2259db075c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1362,45 +1362,43 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) { struct list_head *tmp; struct cifsTconInfo *tcon; + __be32 old_ip; read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &GlobalTreeConnectionList) { cFYI(1, ("Next tcon")); tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if (tcon->ses) { - if (tcon->ses->server) { - cFYI(1, - ("old ip addr: %x == new ip %x ?", - tcon->ses->server->addr.sockAddr.sin_addr. - s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { - /* BB lock tcon, server and tcp session and increment use count here? */ - /* found a match on the TCP session */ - /* BB check if reconnection needed */ - cFYI(1, - ("IP match, old UNC: %s new: %s", - tcon->treeName, uncName)); - if (strncmp - (tcon->treeName, uncName, - MAX_TREE_SIZE) == 0) { - cFYI(1, - ("and old usr: %s new: %s", - tcon->treeName, uncName)); - if (strncmp - (tcon->ses->userName, - userName, - MAX_USERNAME_SIZE) == 0) { - read_unlock(&GlobalSMBSeslock); - /* matched smb session - (user name */ - return tcon; - } - } - } - } - } + if (!tcon->ses || !tcon->ses->server) + continue; + + old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr; + cFYI(1, ("old ip addr: %x == new ip %x ?", + old_ip, new_target_ip_addr)); + + if (old_ip != new_target_ip_addr) + continue; + + /* BB lock tcon, server, tcp session and increment use count? */ + /* found a match on the TCP session */ + /* BB check if reconnection needed */ + cFYI(1, ("IP match, old UNC: %s new: %s", + tcon->treeName, uncName)); + + if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE)) + continue; + + cFYI(1, ("and old usr: %s new: %s", + tcon->treeName, uncName)); + + if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE)) + continue; + + /* matched smb session (user name) */ + read_unlock(&GlobalSMBSeslock); + return tcon; } + read_unlock(&GlobalSMBSeslock); return NULL; } -- cgit v1.2.3 From cf432eb50ffd03572c08a006f44e0069957cf300 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 May 2008 22:27:16 +0000 Subject: [CIFS] cleanup cifsd completion Was a holdover from the old kernel_thread based cifsd code. We needed to know that the thread had set the task variable before proceeding. Now that kthread_run returns the new task, this doesn't appear to be needed anymore. As best I can tell, this sleep was intended to try to prevent cifs_umount from freeing the cifsSesInfo struct before cifsd had exited. Now that cifsd is using the kthread API, we know that when kthread_stop returns that cifsd has exited, so I don't think this is needed any longer. Signed-off-by: Jeff Layton Acked-by: Christop Hellwig Signed-off-by: Steve French --- fs/cifs/connect.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f2259db075c..957998e8477 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -49,8 +49,6 @@ #define CIFS_PORT 445 #define RFC1001_PORT 139 -static DECLARE_COMPLETION(cifsd_complete); - extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); @@ -356,7 +354,6 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) atomic_inc(&tcpSesAllocCount); length = tcpSesAllocCount.counter; write_unlock(&GlobalSMBSeslock); - complete(&cifsd_complete); if (length > 1) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -1980,7 +1977,6 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, kfree(srvTcp->hostname); goto out; } - wait_for_completion(&cifsd_complete); rc = 0; memcpy(srvTcp->workstation_RFC1001_name, volume_info.source_rfc1001_name, 16); @@ -3553,8 +3549,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) cifs_sb->prepathlen = 0; cifs_sb->prepath = NULL; kfree(tmp); - if (ses) - schedule_timeout_interruptible(msecs_to_jiffies(500)); if (ses) sesInfoFree(ses); -- cgit v1.2.3 From 7f3d4ee108c184ab215036051087aaaaa8de7661 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 May 2008 09:22:39 +0200 Subject: vfs: splice remove_suid() cleanup generic_file_splice_write() duplicates remove_suid() just because it doesn't hold i_mutex. But it grabs i_mutex inside splice_from_pipe() anyway, so this is rather pointless. Move locking to generic_file_splice_write() and call remove_suid() and __splice_from_pipe() instead. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/splice.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 633f58ebfb7..cece15b4ef7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; - int killsuid, killpriv; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; ssize_t ret; - int err = 0; - - killpriv = security_inode_need_killpriv(out->f_path.dentry); - killsuid = should_remove_suid(out->f_path.dentry); - if (unlikely(killsuid || killpriv)) { - mutex_lock(&inode->i_mutex); - if (killpriv) - err = security_inode_killpriv(out->f_path.dentry); - if (!err && killsuid) - err = __remove_suid(out->f_path.dentry, killsuid); - mutex_unlock(&inode->i_mutex); - if (err) - return err; - } - ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + inode_double_lock(inode, pipe->inode); + ret = remove_suid(out->f_path.dentry); + if (likely(!ret)) + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); + inode_double_unlock(inode, pipe->inode); if (ret > 0) { unsigned long nr_pages; @@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); -- cgit v1.2.3 From 221e583a735fc5d879d83c2a76b8ee5afcbdf146 Mon Sep 17 00:00:00 2001 From: Rasmus Rohde Date: Wed, 30 Apr 2008 17:22:06 +0200 Subject: udf: Make udf exportable Cc: Christoph Hellwig Signed-off-by: Rasmus Rohde Signed-off-by: Jan Kara --- fs/udf/namei.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/udf/super.c | 1 + fs/udf/udfdecl.h | 1 + 3 files changed, 140 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index ba5537d4bc1..47a6589e10b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include static inline int udf_match(int len1, const char *name1, int len2, const char *name2) @@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo = UDF_I(dir); + int isdotdot = dentry->d_name.len == 2 && + dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; size = udf_ext0_offset(dir) + dir->i_size; f_pos = udf_ext0_offset(dir); @@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, continue; } + if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) && + isdotdot) { + brelse(epos.bh); + return fi; + } + if (!lfi) continue; @@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, } } unlock_kernel(); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static struct fileIdentDesc *udf_add_entry(struct inode *dir, @@ -1232,6 +1240,134 @@ end_rename: return retval; } +static struct dentry *udf_get_parent(struct dentry *child) +{ + struct dentry *parent; + struct inode *inode = NULL; + struct dentry dotdot; + struct fileIdentDesc cfi; + struct udf_fileident_bh fibh; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + + lock_kernel(); + if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) + goto out_unlock; + + if (fibh.sbh != fibh.ebh) + brelse(fibh.ebh); + brelse(fibh.sbh); + + inode = udf_iget(child->d_inode->i_sb, + lelb_to_cpu(cfi.icb.extLocation)); + if (!inode) + goto out_unlock; + unlock_kernel(); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + + return parent; +out_unlock: + unlock_kernel(); + return ERR_PTR(-EACCES); +} + + +static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, + u16 partref, __u32 generation) +{ + struct inode *inode; + struct dentry *result; + kernel_lb_addr loc; + + if (block == 0) + return ERR_PTR(-ESTALE); + + loc.logicalBlockNum = block; + loc.partitionReferenceNum = partref; + inode = udf_iget(sb, loc); + + if (inode == NULL) + return ERR_PTR(-ENOMEM); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +static struct dentry *udf_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if ((fh_len != 3 && fh_len != 5) || + (fh_type != FILEID_UDF_WITH_PARENT && + fh_type != FILEID_UDF_WITHOUT_PARENT)) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref, + fid->udf.generation); +} + +static struct dentry *udf_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.parent_block, + fid->udf.parent_partref, + fid->udf.parent_generation); +} +static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, + int connectable) +{ + int len = *lenp; + struct inode *inode = de->d_inode; + kernel_lb_addr location = UDF_I(inode)->i_location; + struct fid *fid = (struct fid *)fh; + int type = FILEID_UDF_WITHOUT_PARENT; + + if (len < 3 || (connectable && len < 5)) + return 255; + + *lenp = 3; + fid->udf.block = location.logicalBlockNum; + fid->udf.partref = location.partitionReferenceNum; + fid->udf.generation = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + spin_lock(&de->d_lock); + inode = de->d_parent->d_inode; + location = UDF_I(inode)->i_location; + fid->udf.parent_block = location.logicalBlockNum; + fid->udf.parent_partref = location.partitionReferenceNum; + fid->udf.parent_generation = inode->i_generation; + spin_unlock(&de->d_lock); + *lenp = 5; + type = FILEID_UDF_WITH_PARENT; + } + + return type; +} + +const struct export_operations udf_export_ops = { + .encode_fh = udf_encode_fh, + .fh_to_dentry = udf_fh_to_dentry, + .fh_to_parent = udf_fh_to_parent, + .get_parent = udf_get_parent, +}; + const struct inode_operations udf_dir_inode_operations = { .lookup = udf_lookup, .create = udf_create, diff --git a/fs/udf/super.c b/fs/udf/super.c index b564fc140fe..260f4b82c79 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; + sb->s_export_op = &udf_export_ops; sb->dq_op = NULL; sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f3f45d02927..8fa9c2d7091 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -73,6 +73,7 @@ struct task_struct; struct buffer_head; struct super_block; +extern const struct export_operations udf_export_ops; extern const struct inode_operations udf_dir_inode_operations; extern const struct file_operations udf_dir_operations; extern const struct inode_operations udf_file_inode_operations; -- cgit v1.2.3 From 9afadc4b1fd25337003832c9a4668f9bd42cdda9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 May 2008 18:26:17 +0200 Subject: udf: Fix memory corruption when fs mounted with noadinicb option When UDF filesystem is mounted with noadinicb mount option, it happens that we extend an empty directory with a block. A code in udf_add_entry() didn't count with this possibility and used uninitialized data leading to memory and filesystem corruption. Add a check whether file already has some extents before operating on them. Signed-off-by: Jan Kara --- fs/udf/namei.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 47a6589e10b..3d94bc1cfba 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -315,7 +315,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, uint16_t liu; int block; kernel_lb_addr eloc; - uint32_t elen; + uint32_t elen = 0; sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo; @@ -406,7 +406,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, } add: - if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + /* Is there any extent whose size we need to round up? */ + if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) { elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1); if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); -- cgit v1.2.3 From eeae1d48c011839d9e1cdc1e8aacf0193c9d8197 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2008 13:26:27 +0200 Subject: block: use unitialized_var() in bio_alloc_bioset() Better than setting idx to some random value and it silences the same bogus gcc warning. Signed-off-by: Jens Axboe --- fs/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 799f86deff2..2fa04ff8670 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (likely(nr_iovecs)) { - unsigned long idx = 0; /* shut up gcc */ + unsigned long uninitialized_var(idx); bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { -- cgit v1.2.3 From ffee0259c9edcc4c0f06b60df51c461eeecad4c0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 30 Apr 2008 09:08:54 +0200 Subject: docbook: fix bio missing parameter Fix fs/bio.c kernel-doc parameter warning: Warning(linux-2.6.25-git14//fs/bio.c:972): No description found for parameter 'reading' Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- fs/bio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 2fa04ff8670..78562574cb5 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -963,6 +963,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) * @data: pointer to buffer to copy * @len: length in bytes * @gfp_mask: allocation flags for bio and page allocation + * @reading: data direction is READ * * copy the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. -- cgit v1.2.3 From 75065ff619e42fe35178eda863cbcddd57776794 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 May 2008 14:06:19 +0200 Subject: Revert "relay: fix splice problem" This reverts commit c3270e577c18b3d0e984c3371493205a4807db9d. --- fs/splice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index cece15b4ef7..78150038b58 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1072,7 +1072,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos = sd.pos; + *ppos += ret; return ret; } -- cgit v1.2.3 From ba719baeabbff5476eeb91c223e6921ba29e1490 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 6 May 2008 20:42:38 -0700 Subject: sys_pipe(): fix file descriptor leaks Remember to close the files if copy_to_user() failed. Spotted by dm.n9107@gmail.com. Signed-off-by: Ulrich Drepper Cc: DM Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 3499f9ff631..ec228bc9f88 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1086,8 +1087,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes) error = do_pipe(fd); if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) + if (copy_to_user(fildes, fd, sizeof(fd))) { + sys_close(fd[0]); + sys_close(fd[1]); error = -EFAULT; + } } return error; } -- cgit v1.2.3 From 19566ca6dc26600bae4b75701d4dced8d8540f16 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Thu, 8 May 2008 22:36:27 +0800 Subject: fs/proc/task_mmu.c: remove duplicated include files Removed duplicated include files and in fs/proc/task_mmu.c. Signed-off-by: Huang Weiyi Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e2b8e769f51..88717c0f941 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -5,11 +5,9 @@ #include #include #include -#include #include #include #include -#include #include #include -- cgit v1.2.3 From 7c5e628f95b440b69332b1ed3eb112648fc8f7ff Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 8 May 2008 20:48:42 +0000 Subject: [CIFS] Fixed build warning in is_ip Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 3 +++ fs/cifs/netmisc.c | 32 +------------------------------- 3 files changed, 5 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a249a29109a..d481f6c5a2b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -70,7 +70,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(int, char *source, void *dst); +extern int cifs_inet_pton(const int, const char *source, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 957998e8477..791ca5c1a11 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1302,6 +1302,9 @@ cifs_parse_mount_options(char *options, const char *devname, "begin with // or \\\\ \n"); return 1; } + value = strpbrk(vol->UNC+2, "/\\"); + if (value) + *value = '\\'; } else { printk(KERN_WARNING "CIFS: UNC name too long\n"); return 1; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 3b5a5ce882b..00f4cff400b 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -132,47 +132,17 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { {0, 0} }; - -/* if the mount helper is missing we need to reverse the 1st slash - from '/' to backslash in order to format the UNC properly for - ip address parsing and for tree connect (unless the user - remembered to put the UNC name in properly). Fortunately we do - not have to call this twice (we check for IPv4 addresses - first, so it is already converted by the time we - try IPv6 addresses */ -static int canonicalize_unc(char *cp) -{ - int i; - - for (i = 0; i <= 46 /* INET6_ADDRSTRLEN */ ; i++) { - if (cp[i] == 0) - break; - if (cp[i] == '\\') - break; - if (cp[i] == '/') { - cFYI(DBG2, ("change slash to \\ in malformed UNC")); - cp[i] = '\\'; - return 1; - } - } - return 0; -} - /* Convert string containing dotted ip address to binary form */ /* returns 0 if invalid address */ int -cifs_inet_pton(int address_family, char *cp, void *dst) +cifs_inet_pton(const int address_family, const char *cp, void *dst) { int ret = 0; /* calculate length by finding first slash or NULL */ if (address_family == AF_INET) { ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); - if (ret == 0) { - if (canonicalize_unc(cp)) - ret = in4_pton(cp, -1, dst, '\\', NULL); - } } else if (address_family == AF_INET6) { ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); } -- cgit v1.2.3 From af4b3c355cbd38703471e55d11f42d8640db4118 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 9 May 2008 03:48:05 +0000 Subject: [CIFS] fix build warning Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 6fe1bc5bb36..34902cff540 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -589,7 +589,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, unlock_file = true; fid = open_file->netfid; } else if (pfid == NULL) { - bool oplock = false; + int oplock = 0; /* open file */ rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, &fid, &oplock, NULL, -- cgit v1.2.3 From 1b20d672188bf80baef60d515a123f556871a5ce Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 9 May 2008 18:17:21 +0000 Subject: [CIFS] cifs_find_tcp_session cleanup This patch cleans up cifs_find_tcp_session so it become less indented. Also the error of skipping IPv6 matched addresses fixed. Signed-off-by: Cyrill Gorcunov Signed-off-by: Steve French --- fs/cifs/connect.c | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 791ca5c1a11..6b520aad7af 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1318,42 +1318,43 @@ cifs_parse_mount_options(char *options, const char *devname, static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr *target_ip_addr, - struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + struct in6_addr *target_ip6_addr, + char *userName, struct TCP_Server_Info **psrvTcp) { struct list_head *tmp; struct cifsSesInfo *ses; + *psrvTcp = NULL; - read_lock(&GlobalSMBSeslock); + read_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses->server) { - if ((target_ip_addr && - (ses->server->addr.sockAddr.sin_addr.s_addr - == target_ip_addr->s_addr)) || (target_ip6_addr - && memcmp(&ses->server->addr.sockAddr6.sin6_addr, - target_ip6_addr, sizeof(*target_ip6_addr)))) { - /* BB lock server and tcp session and increment - use count here?? */ - - /* found a match on the TCP session */ - *psrvTcp = ses->server; - - /* BB check if reconnection needed */ - if (strncmp - (ses->userName, userName, - MAX_USERNAME_SIZE) == 0){ - read_unlock(&GlobalSMBSeslock); - /* Found exact match on both TCP and - SMB sessions */ - return ses; - } - } + if (!ses->server) + continue; + + if (target_ip_addr && + ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr) + continue; + else if (target_ip6_addr && + memcmp(&ses->server->addr.sockAddr6.sin6_addr, + target_ip6_addr, sizeof(*target_ip6_addr))) + continue; + /* BB lock server and tcp session and increment use count here?? */ + + /* found a match on the TCP session */ + *psrvTcp = ses->server; + + /* BB check if reconnection needed */ + if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) { + read_unlock(&GlobalSMBSeslock); + /* Found exact match on both TCP and + SMB sessions */ + return ses; } /* else tcp and smb sessions need reconnection */ } read_unlock(&GlobalSMBSeslock); + return NULL; } -- cgit v1.2.3 From 02eadeffda169a45946c79270ec19f45eeafb8e7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 9 May 2008 21:26:11 +0000 Subject: [CIFS] add local struct inode pointer to cifs_setattr Clean up cifs_setattr a bit by adding a local inode pointer, and changing all of the direntry->d_inode references to it. This also adds a bit of micro-optimization. d_inode shouldn't change over the life of this function, so we only need to dereference it once. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- fs/cifs/inode.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6b520aad7af..96505086759 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1339,7 +1339,7 @@ cifs_find_tcp_session(struct in_addr *target_ip_addr, memcmp(&ses->server->addr.sockAddr6.sin6_addr, target_ip6_addr, sizeof(*target_ip6_addr))) continue; - /* BB lock server and tcp session and increment use count here?? */ + /* BB lock server and tcp session; increment use count here?? */ /* found a match on the TCP session */ *psrvTcp = ses->server; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 0d9d2e6d7af..d904a037c83 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1408,18 +1408,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; struct cifsInodeInfo *cifsInode; + struct inode *inode = direntry->d_inode; xid = GetXid(); cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid)); - cifs_sb = CIFS_SB(direntry->d_inode->i_sb); + cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { /* check if we have permission to change attrs */ - rc = inode_change_ok(direntry->d_inode, attrs); + rc = inode_change_ok(inode, attrs); if (rc < 0) { FreeXid(xid); return rc; @@ -1432,7 +1433,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) FreeXid(xid); return -ENOMEM; } - cifsInode = CIFS_I(direntry->d_inode); + cifsInode = CIFS_I(inode); if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { /* @@ -1443,9 +1444,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) will be truncated anyway? Also, should we error out here if the flush returns error? */ - rc = filemap_write_and_wait(direntry->d_inode->i_mapping); + rc = filemap_write_and_wait(inode->i_mapping); if (rc != 0) { - CIFS_I(direntry->d_inode)->write_behind_rc = rc; + cifsInode->write_behind_rc = rc; rc = 0; } } @@ -1521,9 +1522,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) */ if (rc == 0) { - rc = cifs_vmtruncate(direntry->d_inode, attrs->ia_size); - cifs_truncate_page(direntry->d_inode->i_mapping, - direntry->d_inode->i_size); + rc = cifs_vmtruncate(inode, attrs->ia_size); + cifs_truncate_page(inode->i_mapping, inode->i_size); } else goto cifs_setattr_exit; } @@ -1557,7 +1557,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = 0; #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - rc = mode_to_acl(direntry->d_inode, full_path, mode); + rc = mode_to_acl(inode, full_path, mode); else if ((mode & S_IWUGO) == 0) { #else if ((mode & S_IWUGO) == 0) { @@ -1665,7 +1665,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* do not need local check to inode_check_ok since the server does that */ if (!rc) - rc = inode_setattr(direntry->d_inode, attrs); + rc = inode_setattr(inode, attrs); cifs_setattr_exit: kfree(full_path); FreeXid(xid); -- cgit v1.2.3 From 67750fb9e07940c078d1edb16fd736ccc92a4a4e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 9 May 2008 22:28:02 +0000 Subject: [CIFS] when not using unix extensions, check for and set ATTR_READONLY on create and mkdir When creating a directory on a CIFS share without POSIX extensions, and the given mode has no write bits set, set the ATTR_READONLY bit. When creating a file, set ATTR_READONLY if the create mode has no write bits set and we're not using unix extensions. There are some comments about this being problematic due to the VFS splitting creates into 2 parts. I'm not sure what that's actually talking about, but I'm assuming that it has something to do with how mknod is implemented. In the simple case where we have no unix extensions and we're just creating a regular file, there's no reason we can't set ATTR_READONLY. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 1 + fs/cifs/cifssmb.c | 16 ++++++---------- fs/cifs/dir.c | 16 +++++++++++++--- fs/cifs/inode.c | 15 +++++++++++---- 4 files changed, 31 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index a0d26b540d4..c43bf4b7a55 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -340,6 +340,7 @@ #define OPEN_NO_RECALL 0x00400000 #define OPEN_FREE_SPACE_QUERY 0x00800000 /* should be zero */ #define CREATE_OPTIONS_MASK 0x007FFFFF +#define CREATE_OPTION_READONLY 0x10000000 #define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */ /* ImpersonationLevel flags */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cfd9750852b..95fbba4ea7d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1224,11 +1224,8 @@ OldOpenRetry: else /* BB FIXME BB */ pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); - /* if ((omode & S_IWUGO) == 0) - pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ - /* Above line causes problems due to vfs splitting create into two - pieces - need to set mode after file created not while it is - being created */ + if (create_options & CREATE_OPTION_READONLY) + pSMB->FileAttributes |= cpu_to_le16(ATTR_READONLY); /* BB FIXME BB */ /* pSMB->CreateOptions = cpu_to_le32(create_options & @@ -1331,17 +1328,16 @@ openRetry: pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM); else pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL); + /* XP does not handle ATTR_POSIX_SEMANTICS */ /* but it helps speed up case sensitive checks for other servers such as Samba */ if (tcon->ses->capabilities & CAP_UNIX) pSMB->FileAttributes |= cpu_to_le32(ATTR_POSIX_SEMANTICS); - /* if ((omode & S_IWUGO) == 0) - pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/ - /* Above line causes problems due to vfs splitting create into two - pieces - need to set mode after file created not while it is - being created */ + if (create_options & CREATE_OPTION_READONLY) + pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY); + pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL); pSMB->CreateDisposition = cpu_to_le32(openDisposition); pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6ed775986be..e4e0078a052 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -119,6 +119,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, { int rc = -ENOENT; int xid; + int create_options = CREATE_NOT_DIR; int oplock = 0; int desiredAccess = GENERIC_READ | GENERIC_WRITE; __u16 fileHandle; @@ -176,9 +177,19 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, FreeXid(xid); return -ENOMEM; } + + mode &= ~current->fs->umask; + + /* + * if we're not using unix extensions, see if we need to set + * ATTR_READONLY on the create call + */ + if (!pTcon->unix_ext && (mode & S_IWUGO) == 0) + create_options |= CREATE_OPTION_READONLY; + if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, + desiredAccess, create_options, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else @@ -187,7 +198,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (rc == -EIO) { /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, + desiredAccess, create_options, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } @@ -197,7 +208,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { - mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)current->fsuid, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d904a037c83..fcbdbb6ad7b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -974,8 +974,8 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; + mode &= ~current->fs->umask; if (pTcon->unix_ext) { - mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -994,9 +994,16 @@ mkdir_get_info: CIFS_MOUNT_MAP_SPECIAL_CHR); } } else { - /* BB to be implemented via Windows secrty descriptors - eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, - -1, -1, local_nls); */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + (mode & S_IWUGO) == 0) { + FILE_BASIC_INFO pInfo; + memset(&pInfo, 0, sizeof(pInfo)); + pInfo.Attributes = cpu_to_le32(ATTR_READONLY); + CIFSSMBSetTimes(xid, pTcon, full_path, + &pInfo, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + } if (direntry->d_inode) { direntry->d_inode->i_mode = mode; direntry->d_inode->i_mode |= S_IFDIR; -- cgit v1.2.3 From e691b9d1a096fbaaff9d6d6aef1adc593b786e62 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 11 May 2008 15:53:33 +0000 Subject: [CIFS] don't allow demultiplex thread to exit until kthread_stop is called cifs_demultiplex_thread can exit under several conditions: 1) if it's signaled 2) if there's a problem with session setup 3) if kthread_stop is called on it The first two are problems. If kthread_stop is called on the thread, there is no guarantee that it will still be up. We need to have the thread stay up until kthread_stop is called on it. One option would be to not even try to tear things down until after kthread_stop is called. However, in the case where there is a problem setting up the session, there's no real reason to try continuing the loop. This patch allows the thread to clean up and prepare for exit under all three conditions, but it has the thread go to sleep until kthread_stop is called. This allows us to simplify the shutdown code somewhat since we can be reasonably sure that the thread won't exit after being signaled but before kthread_stop is called. It also removes the places where the thread itself set the tsk variable since it appeared that it could have a potential race where the thread might never be shut down. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig Signed-off-by: Steve French --- fs/cifs/connect.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 96505086759..f428bf3bf1a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -348,7 +348,6 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) int reconnect; current->flags |= PF_MEMALLOC; - server->tsk = current; /* save process info to wake at shutdown */ cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); write_lock(&GlobalSMBSeslock); atomic_inc(&tcpSesAllocCount); @@ -651,10 +650,20 @@ multi_t2_fnd: spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; - server->tsk = NULL; + spin_unlock(&GlobalMid_Lock); + + /* don't exit until kthread_stop is called */ + set_current_state(TASK_UNINTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); + } + set_current_state(TASK_RUNNING); + /* check if we have blocked requests that need to free */ /* Note that cifs_max_pending is normally 50, but can be set at module install time to as little as two */ + spin_lock(&GlobalMid_Lock); if (atomic_read(&server->inFlight) >= cifs_max_pending) atomic_set(&server->inFlight, cifs_max_pending - 1); /* We do not want to set the max_pending too low or we @@ -2187,15 +2196,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, srvTcp->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); if (srvTcp->tsk) { - struct task_struct *tsk; /* If we could verify that kthread_stop would always wake up processes blocked in tcp in recv_mesg then we could remove the send_sig call */ force_sig(SIGKILL, srvTcp->tsk); - tsk = srvTcp->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(srvTcp->tsk); } } /* If find_unc succeeded then rc == 0 so we can not end */ @@ -2211,23 +2217,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if ((temp_rc == -ESHUTDOWN) && (pSesInfo->server) && (pSesInfo->server->tsk)) { - struct task_struct *tsk; force_sig(SIGKILL, pSesInfo->server->tsk); - tsk = pSesInfo->server->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(pSesInfo->server->tsk); } } else { cFYI(1, ("No session or bad tcon")); if ((pSesInfo->server) && (pSesInfo->server->tsk)) { - struct task_struct *tsk; force_sig(SIGKILL, pSesInfo->server->tsk); - tsk = pSesInfo->server->tsk; - if (tsk) - kthread_stop(tsk); + kthread_stop(pSesInfo->server->tsk); } } sesInfoFree(pSesInfo); -- cgit v1.2.3 From c3921ab71507b108d51a0f1ee960f80cd668a93d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 May 2008 16:04:48 -0700 Subject: Add new 'cond_resched_bkl()' helper function It acts exactly like a regular 'cond_resched()', but will not get optimized away when CONFIG_PREEMPT is set. Normal kernel code is already preemptable in the presense of CONFIG_PREEMPT, so cond_resched() is optimized away (see commit 02b67cc3ba36bdba351d6c3a00593f4ec550d9d3 "sched: do not do cond_resched() when CONFIG_PREEMPT"). But when wanting to conditionally reschedule while holding a lock, you need to use "cond_sched_lock(lock)", and the new function is the BKL equivalent of that. Also make fs/locks.c use it. Signed-off-by: Linus Torvalds --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 0ac6b92cb0b..11dbf08651b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -773,7 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) - cond_resched(); + cond_resched_bkl(); find_conflict: for_each_lock(inode, before) { -- cgit v1.2.3 From a0612b1f0b3d851458dafe5886e33d58c1967440 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 12 May 2008 14:01:49 -0700 Subject: uml: hppfs fixes hppfs tidying and fixes noticed during hch's get_inode work - style fixes a copy_to_user got its return value checked hppfs_write no longer fiddles file->f_pos because it gets and returns pos in its arguments hppfs_delete_inode dputs the underlyng procfs dentry stored in its private data and mntputs the vfsmnt stashed in s_fs_info hppfs_put_super no longer needs to mntput the s_fs_info, so it no longer needs to exist hppfs_readlink and hppfs_follow_link were doing a bunch of stuff with a struct file which they didn't use there is now a ->permission which calls generic_permission get_inode was always returning 0 for some reason - it now returns an inode if nothing bad happened Signed-off-by: Jeff Dike Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hppfs/hppfs_kern.c | 82 +++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 8601d8ef3b5..65077aa90f0 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -33,7 +33,7 @@ struct hppfs_private { }; struct hppfs_inode_info { - struct dentry *proc_dentry; + struct dentry *proc_dentry; struct inode vfs_inode; }; @@ -52,7 +52,7 @@ static int is_pid(struct dentry *dentry) int i; sb = dentry->d_sb; - if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) + if (dentry->d_parent != sb->s_root) return 0; for (i = 0; i < dentry->d_name.len; i++) { @@ -136,7 +136,7 @@ static int file_removed(struct dentry *dentry, const char *file) } static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + struct nameidata *nd) { struct dentry *proc_dentry, *new, *parent; struct inode *inode; @@ -254,6 +254,8 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, int err; if (hppfs->contents != NULL) { + int rem; + if (*ppos >= hppfs->len) return 0; @@ -267,8 +269,10 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, if (off + count > hppfs->len) count = hppfs->len - off; - copy_to_user(buf, &data->contents[off], count); - *ppos += count; + rem = copy_to_user(buf, &data->contents[off], count); + *ppos += count - rem; + if (rem > 0) + return -EFAULT; } else if (hppfs->host_fd != -1) { err = os_seek_file(hppfs->host_fd, *ppos); if (err) { @@ -285,21 +289,15 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, return count; } -static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len, - loff_t *ppos) +static ssize_t hppfs_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { struct hppfs_private *data = file->private_data; struct file *proc_file = data->proc_file; ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - int err; write = proc_file->f_path.dentry->d_inode->i_fop->write; - - proc_file->f_pos = file->f_pos; - err = (*write)(proc_file, buf, len, &proc_file->f_pos); - file->f_pos = proc_file->f_pos; - - return err; + return (*write)(proc_file, buf, len, ppos); } static int open_host_sock(char *host_file, int *filter_out) @@ -357,7 +355,7 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter, if (filter) { while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) + sizeof(data->contents), NULL, 0)) > 0) os_write_file(fd, data->contents, n); err = os_shutdown_socket(fd, 0, 1); if (err) { @@ -429,8 +427,8 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; char *host_file; int err, fd, type, filter; @@ -492,8 +490,8 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; int err; err = -ENOMEM; @@ -620,6 +618,9 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) void hppfs_delete_inode(struct inode *ino) { + dput(HPPFS_I(ino)->proc_dentry); + mntput(ino->i_sb->s_fs_info); + clear_inode(ino); } @@ -628,69 +629,46 @@ static void hppfs_destroy_inode(struct inode *inode) kfree(HPPFS_I(inode)); } -static void hppfs_put_super(struct super_block *sb) -{ - mntput(sb->s_fs_info); -} - static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, .delete_inode = hppfs_delete_inode, .statfs = hppfs_statfs, - .put_super = hppfs_put_super, }; static int hppfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - int ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return PTR_ERR(proc_file); - - ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen); - - fput(proc_file); - - return ret; + return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, + buflen); } -static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - void *ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return proc_file; - - ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); - fput(proc_file); + return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); +} - return ret; +int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, NULL); } static const struct inode_operations hppfs_dir_iops = { .lookup = hppfs_lookup, + .permission = hppfs_permission, }; static const struct inode_operations hppfs_link_iops = { .readlink = hppfs_readlink, .follow_link = hppfs_follow_link, + .permission = hppfs_permission, }; static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) @@ -712,7 +690,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_fop = &hppfs_file_fops; } - HPPFS_I(inode)->proc_dentry = dentry; + HPPFS_I(inode)->proc_dentry = dget(dentry); inode->i_uid = proc_ino->i_uid; inode->i_gid = proc_ino->i_gid; @@ -725,7 +703,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_size = proc_ino->i_size; inode->i_blocks = proc_ino->i_blocks; - return 0; + return inode; } static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -- cgit v1.2.3 From 46d7b522ebf486edbd096965d534cc6465e9e309 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 12 May 2008 14:01:50 -0700 Subject: uml: move hppfs_kern.c to hppfs.c There's no reason for the _kern in hppfs_kern.c, so move it to hppfs.c. Signed-off-by: Jeff Dike Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hppfs/Makefile | 6 +- fs/hppfs/hppfs.c | 771 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/hppfs/hppfs_kern.c | 771 -------------------------------------------------- 3 files changed, 774 insertions(+), 774 deletions(-) create mode 100644 fs/hppfs/hppfs.c delete mode 100644 fs/hppfs/hppfs_kern.c (limited to 'fs') diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile index 6890433f759..8a1f5034436 100644 --- a/fs/hppfs/Makefile +++ b/fs/hppfs/Makefile @@ -1,9 +1,9 @@ # -# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) # Licensed under the GPL # -hppfs-objs := hppfs_kern.o +hppfs-objs := hppfs.o obj-y = -obj-$(CONFIG_HPPFS) += hppfs.o +obj-$(CONFIG_HPPFS) += $(hppfs-objs) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c new file mode 100644 index 00000000000..65077aa90f0 --- /dev/null +++ b/fs/hppfs/hppfs.c @@ -0,0 +1,771 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "os.h" + +static struct inode *get_inode(struct super_block *, struct dentry *); + +struct hppfs_data { + struct list_head list; + char contents[PAGE_SIZE - sizeof(struct list_head)]; +}; + +struct hppfs_private { + struct file *proc_file; + int host_fd; + loff_t len; + struct hppfs_data *contents; +}; + +struct hppfs_inode_info { + struct dentry *proc_dentry; + struct inode vfs_inode; +}; + +static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) +{ + return container_of(inode, struct hppfs_inode_info, vfs_inode); +} + +#define HPPFS_SUPER_MAGIC 0xb00000ee + +static const struct super_operations hppfs_sbops; + +static int is_pid(struct dentry *dentry) +{ + struct super_block *sb; + int i; + + sb = dentry->d_sb; + if (dentry->d_parent != sb->s_root) + return 0; + + for (i = 0; i < dentry->d_name.len; i++) { + if (!isdigit(dentry->d_name.name[i])) + return 0; + } + return 1; +} + +static char *dentry_name(struct dentry *dentry, int extra) +{ + struct dentry *parent; + char *root, *name; + const char *seg_name; + int len, seg_len; + + len = 0; + parent = dentry; + while (parent->d_parent != parent) { + if (is_pid(parent)) + len += strlen("pid") + 1; + else len += parent->d_name.len + 1; + parent = parent->d_parent; + } + + root = "proc"; + len += strlen(root); + name = kmalloc(len + extra + 1, GFP_KERNEL); + if (name == NULL) + return NULL; + + name[len] = '\0'; + parent = dentry; + while (parent->d_parent != parent) { + if (is_pid(parent)) { + seg_name = "pid"; + seg_len = strlen("pid"); + } + else { + seg_name = parent->d_name.name; + seg_len = parent->d_name.len; + } + + len -= seg_len + 1; + name[len] = '/'; + strncpy(&name[len + 1], seg_name, seg_len); + parent = parent->d_parent; + } + strncpy(name, root, strlen(root)); + return name; +} + +static int file_removed(struct dentry *dentry, const char *file) +{ + char *host_file; + int extra, fd; + + extra = 0; + if (file != NULL) + extra += strlen(file) + 1; + + host_file = dentry_name(dentry, extra + strlen("/remove")); + if (host_file == NULL) { + printk(KERN_ERR "file_removed : allocation failed\n"); + return -ENOMEM; + } + + if (file != NULL) { + strcat(host_file, "/"); + strcat(host_file, file); + } + strcat(host_file, "/remove"); + + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + kfree(host_file); + if (fd > 0) { + os_close_file(fd); + return 1; + } + return 0; +} + +static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, + struct nameidata *nd) +{ + struct dentry *proc_dentry, *new, *parent; + struct inode *inode; + int err, deleted; + + deleted = file_removed(dentry, NULL); + if (deleted < 0) + return ERR_PTR(deleted); + else if (deleted) + return ERR_PTR(-ENOENT); + + err = -ENOMEM; + parent = HPPFS_I(ino)->proc_dentry; + mutex_lock(&parent->d_inode->i_mutex); + proc_dentry = d_lookup(parent, &dentry->d_name); + if (proc_dentry == NULL) { + proc_dentry = d_alloc(parent, &dentry->d_name); + if (proc_dentry == NULL) { + mutex_unlock(&parent->d_inode->i_mutex); + goto out; + } + new = (*parent->d_inode->i_op->lookup)(parent->d_inode, + proc_dentry, NULL); + if (new) { + dput(proc_dentry); + proc_dentry = new; + } + } + mutex_unlock(&parent->d_inode->i_mutex); + + if (IS_ERR(proc_dentry)) + return proc_dentry; + + err = -ENOMEM; + inode = get_inode(ino->i_sb, proc_dentry); + if (!inode) + goto out_dput; + + d_add(dentry, inode); + return NULL; + + out_dput: + dput(proc_dentry); + out: + return ERR_PTR(err); +} + +static const struct inode_operations hppfs_file_iops = { +}; + +static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, + loff_t *ppos, int is_user) +{ + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + ssize_t n; + + read = file->f_path.dentry->d_inode->i_fop->read; + + if (!is_user) + set_fs(KERNEL_DS); + + n = (*read)(file, buf, count, &file->f_pos); + + if (!is_user) + set_fs(USER_DS); + + if (ppos) + *ppos = file->f_pos; + return n; +} + +static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count) +{ + ssize_t n; + int cur, err; + char *new_buf; + + n = -ENOMEM; + new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (new_buf == NULL) { + printk(KERN_ERR "hppfs_read_file : kmalloc failed\n"); + goto out; + } + n = 0; + while (count > 0) { + cur = min_t(ssize_t, count, PAGE_SIZE); + err = os_read_file(fd, new_buf, cur); + if (err < 0) { + printk(KERN_ERR "hppfs_read : read failed, " + "errno = %d\n", err); + n = err; + goto out_free; + } else if (err == 0) + break; + + if (copy_to_user(buf, new_buf, err)) { + n = -EFAULT; + goto out_free; + } + n += err; + count -= err; + } + out_free: + kfree(new_buf); + out: + return n; +} + +static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct hppfs_private *hppfs = file->private_data; + struct hppfs_data *data; + loff_t off; + int err; + + if (hppfs->contents != NULL) { + int rem; + + if (*ppos >= hppfs->len) + return 0; + + data = hppfs->contents; + off = *ppos; + while (off >= sizeof(data->contents)) { + data = list_entry(data->list.next, struct hppfs_data, + list); + off -= sizeof(data->contents); + } + + if (off + count > hppfs->len) + count = hppfs->len - off; + rem = copy_to_user(buf, &data->contents[off], count); + *ppos += count - rem; + if (rem > 0) + return -EFAULT; + } else if (hppfs->host_fd != -1) { + err = os_seek_file(hppfs->host_fd, *ppos); + if (err) { + printk(KERN_ERR "hppfs_read : seek failed, " + "errno = %d\n", err); + return err; + } + count = hppfs_read_file(hppfs->host_fd, buf, count); + if (count > 0) + *ppos += count; + } + else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); + + return count; +} + +static ssize_t hppfs_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + + write = proc_file->f_path.dentry->d_inode->i_fop->write; + return (*write)(proc_file, buf, len, ppos); +} + +static int open_host_sock(char *host_file, int *filter_out) +{ + char *end; + int fd; + + end = &host_file[strlen(host_file)]; + strcpy(end, "/rw"); + *filter_out = 1; + fd = os_connect_socket(host_file); + if (fd > 0) + return fd; + + strcpy(end, "/r"); + *filter_out = 0; + fd = os_connect_socket(host_file); + return fd; +} + +static void free_contents(struct hppfs_data *head) +{ + struct hppfs_data *data; + struct list_head *ele, *next; + + if (head == NULL) + return; + + list_for_each_safe(ele, next, &head->list) { + data = list_entry(ele, struct hppfs_data, list); + kfree(data); + } + kfree(head); +} + +static struct hppfs_data *hppfs_get_data(int fd, int filter, + struct file *proc_file, + struct file *hppfs_file, + loff_t *size_out) +{ + struct hppfs_data *data, *new, *head; + int n, err; + + err = -ENOMEM; + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) { + printk(KERN_ERR "hppfs_get_data : head allocation failed\n"); + goto failed; + } + + INIT_LIST_HEAD(&data->list); + + head = data; + *size_out = 0; + + if (filter) { + while ((n = read_proc(proc_file, data->contents, + sizeof(data->contents), NULL, 0)) > 0) + os_write_file(fd, data->contents, n); + err = os_shutdown_socket(fd, 0, 1); + if (err) { + printk(KERN_ERR "hppfs_get_data : failed to shut down " + "socket\n"); + goto failed_free; + } + } + while (1) { + n = os_read_file(fd, data->contents, sizeof(data->contents)); + if (n < 0) { + err = n; + printk(KERN_ERR "hppfs_get_data : read failed, " + "errno = %d\n", err); + goto failed_free; + } else if (n == 0) + break; + + *size_out += n; + + if (n < sizeof(data->contents)) + break; + + new = kmalloc(sizeof(*data), GFP_KERNEL); + if (new == 0) { + printk(KERN_ERR "hppfs_get_data : data allocation " + "failed\n"); + err = -ENOMEM; + goto failed_free; + } + + INIT_LIST_HEAD(&new->list); + list_add(&new->list, &data->list); + data = new; + } + return head; + + failed_free: + free_contents(head); + failed: + return ERR_PTR(err); +} + +static struct hppfs_private *hppfs_data(void) +{ + struct hppfs_private *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return data; + + *data = ((struct hppfs_private ) { .host_fd = -1, + .len = -1, + .contents = NULL } ); + return data; +} + +static int file_mode(int fmode) +{ + if (fmode == (FMODE_READ | FMODE_WRITE)) + return O_RDWR; + if (fmode == FMODE_READ) + return O_RDONLY; + if (fmode == FMODE_WRITE) + return O_WRONLY; + return 0; +} + +static int hppfs_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct vfsmount *proc_mnt; + struct dentry *proc_dentry; + char *host_file; + int err, fd, type, filter; + + err = -ENOMEM; + data = hppfs_data(); + if (data == NULL) + goto out; + + host_file = dentry_name(file->f_path.dentry, strlen("/rw")); + if (host_file == NULL) + goto out_free2; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + proc_mnt = inode->i_sb->s_fs_info; + + /* XXX This isn't closed anywhere */ + data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), + file_mode(file->f_mode)); + err = PTR_ERR(data->proc_file); + if (IS_ERR(data->proc_file)) + goto out_free1; + + type = os_file_type(host_file); + if (type == OS_TYPE_FILE) { + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + if (fd >= 0) + data->host_fd = fd; + else + printk(KERN_ERR "hppfs_open : failed to open '%s', " + "errno = %d\n", host_file, -fd); + + data->contents = NULL; + } else if (type == OS_TYPE_DIR) { + fd = open_host_sock(host_file, &filter); + if (fd > 0) { + data->contents = hppfs_get_data(fd, filter, + data->proc_file, + file, &data->len); + if (!IS_ERR(data->contents)) + data->host_fd = fd; + } else + printk(KERN_ERR "hppfs_open : failed to open a socket " + "in '%s', errno = %d\n", host_file, -fd); + } + kfree(host_file); + + file->private_data = data; + return 0; + + out_free1: + kfree(host_file); + out_free2: + free_contents(data->contents); + kfree(data); + out: + return err; +} + +static int hppfs_dir_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct vfsmount *proc_mnt; + struct dentry *proc_dentry; + int err; + + err = -ENOMEM; + data = hppfs_data(); + if (data == NULL) + goto out; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + proc_mnt = inode->i_sb->s_fs_info; + data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), + file_mode(file->f_mode)); + err = PTR_ERR(data->proc_file); + if (IS_ERR(data->proc_file)) + goto out_free; + + file->private_data = data; + return 0; + + out_free: + kfree(data); + out: + return err; +} + +static loff_t hppfs_llseek(struct file *file, loff_t off, int where) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + loff_t (*llseek)(struct file *, loff_t, int); + loff_t ret; + + llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; + if (llseek != NULL) { + ret = (*llseek)(proc_file, off, where); + if (ret < 0) + return ret; + } + + return default_llseek(file, off, where); +} + +static const struct file_operations hppfs_file_fops = { + .owner = NULL, + .llseek = hppfs_llseek, + .read = hppfs_read, + .write = hppfs_write, + .open = hppfs_open, +}; + +struct hppfs_dirent { + void *vfs_dirent; + filldir_t filldir; + struct dentry *dentry; +}; + +static int hppfs_filldir(void *d, const char *name, int size, + loff_t offset, u64 inode, unsigned int type) +{ + struct hppfs_dirent *dirent = d; + + if (file_removed(dirent->dentry, name)) + return 0; + + return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, + inode, type); +} + +static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + int (*readdir)(struct file *, void *, filldir_t); + struct hppfs_dirent dirent = ((struct hppfs_dirent) + { .vfs_dirent = ent, + .filldir = filldir, + .dentry = file->f_path.dentry + }); + int err; + + readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; + + proc_file->f_pos = file->f_pos; + err = (*readdir)(proc_file, &dirent, hppfs_filldir); + file->f_pos = proc_file->f_pos; + + return err; +} + +static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return 0; +} + +static const struct file_operations hppfs_dir_fops = { + .owner = NULL, + .readdir = hppfs_readdir, + .open = hppfs_dir_open, + .fsync = hppfs_fsync, +}; + +static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) +{ + sf->f_blocks = 0; + sf->f_bfree = 0; + sf->f_bavail = 0; + sf->f_files = 0; + sf->f_ffree = 0; + sf->f_type = HPPFS_SUPER_MAGIC; + return 0; +} + +static struct inode *hppfs_alloc_inode(struct super_block *sb) +{ + struct hppfs_inode_info *hi; + + hi = kmalloc(sizeof(*hi), GFP_KERNEL); + if (!hi) + return NULL; + + hi->proc_dentry = NULL; + inode_init_once(&hi->vfs_inode); + return &hi->vfs_inode; +} + +void hppfs_delete_inode(struct inode *ino) +{ + dput(HPPFS_I(ino)->proc_dentry); + mntput(ino->i_sb->s_fs_info); + + clear_inode(ino); +} + +static void hppfs_destroy_inode(struct inode *inode) +{ + kfree(HPPFS_I(inode)); +} + +static const struct super_operations hppfs_sbops = { + .alloc_inode = hppfs_alloc_inode, + .destroy_inode = hppfs_destroy_inode, + .delete_inode = hppfs_delete_inode, + .statfs = hppfs_statfs, +}; + +static int hppfs_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct dentry *proc_dentry; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, + buflen); +} + +static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *proc_dentry; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + + return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); +} + +int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, NULL); +} + +static const struct inode_operations hppfs_dir_iops = { + .lookup = hppfs_lookup, + .permission = hppfs_permission, +}; + +static const struct inode_operations hppfs_link_iops = { + .readlink = hppfs_readlink, + .follow_link = hppfs_follow_link, + .permission = hppfs_permission, +}; + +static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) +{ + struct inode *proc_ino = dentry->d_inode; + struct inode *inode = new_inode(sb); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (S_ISDIR(dentry->d_inode->i_mode)) { + inode->i_op = &hppfs_dir_iops; + inode->i_fop = &hppfs_dir_fops; + } else if (S_ISLNK(dentry->d_inode->i_mode)) { + inode->i_op = &hppfs_link_iops; + inode->i_fop = &hppfs_file_fops; + } else { + inode->i_op = &hppfs_file_iops; + inode->i_fop = &hppfs_file_fops; + } + + HPPFS_I(inode)->proc_dentry = dget(dentry); + + inode->i_uid = proc_ino->i_uid; + inode->i_gid = proc_ino->i_gid; + inode->i_atime = proc_ino->i_atime; + inode->i_mtime = proc_ino->i_mtime; + inode->i_ctime = proc_ino->i_ctime; + inode->i_ino = proc_ino->i_ino; + inode->i_mode = proc_ino->i_mode; + inode->i_nlink = proc_ino->i_nlink; + inode->i_size = proc_ino->i_size; + inode->i_blocks = proc_ino->i_blocks; + + return inode; +} + +static int hppfs_fill_super(struct super_block *sb, void *d, int silent) +{ + struct inode *root_inode; + struct vfsmount *proc_mnt; + int err = -ENOENT; + + proc_mnt = do_kern_mount("proc", 0, "proc", NULL); + if (IS_ERR(proc_mnt)) + goto out; + + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = HPPFS_SUPER_MAGIC; + sb->s_op = &hppfs_sbops; + sb->s_fs_info = proc_mnt; + + err = -ENOMEM; + root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root); + if (!root_inode) + goto out_mntput; + + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_iput; + + return 0; + + out_iput: + iput(root_inode); + out_mntput: + mntput(proc_mnt); + out: + return(err); +} + +static int hppfs_read_super(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) +{ + return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); +} + +static struct file_system_type hppfs_type = { + .owner = THIS_MODULE, + .name = "hppfs", + .get_sb = hppfs_read_super, + .kill_sb = kill_anon_super, + .fs_flags = 0, +}; + +static int __init init_hppfs(void) +{ + return register_filesystem(&hppfs_type); +} + +static void __exit exit_hppfs(void) +{ + unregister_filesystem(&hppfs_type); +} + +module_init(init_hppfs) +module_exit(exit_hppfs) +MODULE_LICENSE("GPL"); diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c deleted file mode 100644 index 65077aa90f0..00000000000 --- a/fs/hppfs/hppfs_kern.c +++ /dev/null @@ -1,771 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "os.h" - -static struct inode *get_inode(struct super_block *, struct dentry *); - -struct hppfs_data { - struct list_head list; - char contents[PAGE_SIZE - sizeof(struct list_head)]; -}; - -struct hppfs_private { - struct file *proc_file; - int host_fd; - loff_t len; - struct hppfs_data *contents; -}; - -struct hppfs_inode_info { - struct dentry *proc_dentry; - struct inode vfs_inode; -}; - -static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) -{ - return container_of(inode, struct hppfs_inode_info, vfs_inode); -} - -#define HPPFS_SUPER_MAGIC 0xb00000ee - -static const struct super_operations hppfs_sbops; - -static int is_pid(struct dentry *dentry) -{ - struct super_block *sb; - int i; - - sb = dentry->d_sb; - if (dentry->d_parent != sb->s_root) - return 0; - - for (i = 0; i < dentry->d_name.len; i++) { - if (!isdigit(dentry->d_name.name[i])) - return 0; - } - return 1; -} - -static char *dentry_name(struct dentry *dentry, int extra) -{ - struct dentry *parent; - char *root, *name; - const char *seg_name; - int len, seg_len; - - len = 0; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) - len += strlen("pid") + 1; - else len += parent->d_name.len + 1; - parent = parent->d_parent; - } - - root = "proc"; - len += strlen(root); - name = kmalloc(len + extra + 1, GFP_KERNEL); - if (name == NULL) - return NULL; - - name[len] = '\0'; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) { - seg_name = "pid"; - seg_len = strlen("pid"); - } - else { - seg_name = parent->d_name.name; - seg_len = parent->d_name.len; - } - - len -= seg_len + 1; - name[len] = '/'; - strncpy(&name[len + 1], seg_name, seg_len); - parent = parent->d_parent; - } - strncpy(name, root, strlen(root)); - return name; -} - -static int file_removed(struct dentry *dentry, const char *file) -{ - char *host_file; - int extra, fd; - - extra = 0; - if (file != NULL) - extra += strlen(file) + 1; - - host_file = dentry_name(dentry, extra + strlen("/remove")); - if (host_file == NULL) { - printk(KERN_ERR "file_removed : allocation failed\n"); - return -ENOMEM; - } - - if (file != NULL) { - strcat(host_file, "/"); - strcat(host_file, file); - } - strcat(host_file, "/remove"); - - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - kfree(host_file); - if (fd > 0) { - os_close_file(fd); - return 1; - } - return 0; -} - -static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) -{ - struct dentry *proc_dentry, *new, *parent; - struct inode *inode; - int err, deleted; - - deleted = file_removed(dentry, NULL); - if (deleted < 0) - return ERR_PTR(deleted); - else if (deleted) - return ERR_PTR(-ENOENT); - - err = -ENOMEM; - parent = HPPFS_I(ino)->proc_dentry; - mutex_lock(&parent->d_inode->i_mutex); - proc_dentry = d_lookup(parent, &dentry->d_name); - if (proc_dentry == NULL) { - proc_dentry = d_alloc(parent, &dentry->d_name); - if (proc_dentry == NULL) { - mutex_unlock(&parent->d_inode->i_mutex); - goto out; - } - new = (*parent->d_inode->i_op->lookup)(parent->d_inode, - proc_dentry, NULL); - if (new) { - dput(proc_dentry); - proc_dentry = new; - } - } - mutex_unlock(&parent->d_inode->i_mutex); - - if (IS_ERR(proc_dentry)) - return proc_dentry; - - err = -ENOMEM; - inode = get_inode(ino->i_sb, proc_dentry); - if (!inode) - goto out_dput; - - d_add(dentry, inode); - return NULL; - - out_dput: - dput(proc_dentry); - out: - return ERR_PTR(err); -} - -static const struct inode_operations hppfs_file_iops = { -}; - -static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, - loff_t *ppos, int is_user) -{ - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); - ssize_t n; - - read = file->f_path.dentry->d_inode->i_fop->read; - - if (!is_user) - set_fs(KERNEL_DS); - - n = (*read)(file, buf, count, &file->f_pos); - - if (!is_user) - set_fs(USER_DS); - - if (ppos) - *ppos = file->f_pos; - return n; -} - -static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count) -{ - ssize_t n; - int cur, err; - char *new_buf; - - n = -ENOMEM; - new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (new_buf == NULL) { - printk(KERN_ERR "hppfs_read_file : kmalloc failed\n"); - goto out; - } - n = 0; - while (count > 0) { - cur = min_t(ssize_t, count, PAGE_SIZE); - err = os_read_file(fd, new_buf, cur); - if (err < 0) { - printk(KERN_ERR "hppfs_read : read failed, " - "errno = %d\n", err); - n = err; - goto out_free; - } else if (err == 0) - break; - - if (copy_to_user(buf, new_buf, err)) { - n = -EFAULT; - goto out_free; - } - n += err; - count -= err; - } - out_free: - kfree(new_buf); - out: - return n; -} - -static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - struct hppfs_private *hppfs = file->private_data; - struct hppfs_data *data; - loff_t off; - int err; - - if (hppfs->contents != NULL) { - int rem; - - if (*ppos >= hppfs->len) - return 0; - - data = hppfs->contents; - off = *ppos; - while (off >= sizeof(data->contents)) { - data = list_entry(data->list.next, struct hppfs_data, - list); - off -= sizeof(data->contents); - } - - if (off + count > hppfs->len) - count = hppfs->len - off; - rem = copy_to_user(buf, &data->contents[off], count); - *ppos += count - rem; - if (rem > 0) - return -EFAULT; - } else if (hppfs->host_fd != -1) { - err = os_seek_file(hppfs->host_fd, *ppos); - if (err) { - printk(KERN_ERR "hppfs_read : seek failed, " - "errno = %d\n", err); - return err; - } - count = hppfs_read_file(hppfs->host_fd, buf, count); - if (count > 0) - *ppos += count; - } - else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); - - return count; -} - -static ssize_t hppfs_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - - write = proc_file->f_path.dentry->d_inode->i_fop->write; - return (*write)(proc_file, buf, len, ppos); -} - -static int open_host_sock(char *host_file, int *filter_out) -{ - char *end; - int fd; - - end = &host_file[strlen(host_file)]; - strcpy(end, "/rw"); - *filter_out = 1; - fd = os_connect_socket(host_file); - if (fd > 0) - return fd; - - strcpy(end, "/r"); - *filter_out = 0; - fd = os_connect_socket(host_file); - return fd; -} - -static void free_contents(struct hppfs_data *head) -{ - struct hppfs_data *data; - struct list_head *ele, *next; - - if (head == NULL) - return; - - list_for_each_safe(ele, next, &head->list) { - data = list_entry(ele, struct hppfs_data, list); - kfree(data); - } - kfree(head); -} - -static struct hppfs_data *hppfs_get_data(int fd, int filter, - struct file *proc_file, - struct file *hppfs_file, - loff_t *size_out) -{ - struct hppfs_data *data, *new, *head; - int n, err; - - err = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) { - printk(KERN_ERR "hppfs_get_data : head allocation failed\n"); - goto failed; - } - - INIT_LIST_HEAD(&data->list); - - head = data; - *size_out = 0; - - if (filter) { - while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) - os_write_file(fd, data->contents, n); - err = os_shutdown_socket(fd, 0, 1); - if (err) { - printk(KERN_ERR "hppfs_get_data : failed to shut down " - "socket\n"); - goto failed_free; - } - } - while (1) { - n = os_read_file(fd, data->contents, sizeof(data->contents)); - if (n < 0) { - err = n; - printk(KERN_ERR "hppfs_get_data : read failed, " - "errno = %d\n", err); - goto failed_free; - } else if (n == 0) - break; - - *size_out += n; - - if (n < sizeof(data->contents)) - break; - - new = kmalloc(sizeof(*data), GFP_KERNEL); - if (new == 0) { - printk(KERN_ERR "hppfs_get_data : data allocation " - "failed\n"); - err = -ENOMEM; - goto failed_free; - } - - INIT_LIST_HEAD(&new->list); - list_add(&new->list, &data->list); - data = new; - } - return head; - - failed_free: - free_contents(head); - failed: - return ERR_PTR(err); -} - -static struct hppfs_private *hppfs_data(void) -{ - struct hppfs_private *data; - - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return data; - - *data = ((struct hppfs_private ) { .host_fd = -1, - .len = -1, - .contents = NULL } ); - return data; -} - -static int file_mode(int fmode) -{ - if (fmode == (FMODE_READ | FMODE_WRITE)) - return O_RDWR; - if (fmode == FMODE_READ) - return O_RDONLY; - if (fmode == FMODE_WRITE) - return O_WRONLY; - return 0; -} - -static int hppfs_open(struct inode *inode, struct file *file) -{ - struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; - char *host_file; - int err, fd, type, filter; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - host_file = dentry_name(file->f_path.dentry, strlen("/rw")); - if (host_file == NULL) - goto out_free2; - - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - - /* XXX This isn't closed anywhere */ - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free1; - - type = os_file_type(host_file); - if (type == OS_TYPE_FILE) { - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - if (fd >= 0) - data->host_fd = fd; - else - printk(KERN_ERR "hppfs_open : failed to open '%s', " - "errno = %d\n", host_file, -fd); - - data->contents = NULL; - } else if (type == OS_TYPE_DIR) { - fd = open_host_sock(host_file, &filter); - if (fd > 0) { - data->contents = hppfs_get_data(fd, filter, - data->proc_file, - file, &data->len); - if (!IS_ERR(data->contents)) - data->host_fd = fd; - } else - printk(KERN_ERR "hppfs_open : failed to open a socket " - "in '%s', errno = %d\n", host_file, -fd); - } - kfree(host_file); - - file->private_data = data; - return 0; - - out_free1: - kfree(host_file); - out_free2: - free_contents(data->contents); - kfree(data); - out: - return err; -} - -static int hppfs_dir_open(struct inode *inode, struct file *file) -{ - struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; - int err; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free; - - file->private_data = data; - return 0; - - out_free: - kfree(data); - out: - return err; -} - -static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - loff_t (*llseek)(struct file *, loff_t, int); - loff_t ret; - - llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; - if (llseek != NULL) { - ret = (*llseek)(proc_file, off, where); - if (ret < 0) - return ret; - } - - return default_llseek(file, off, where); -} - -static const struct file_operations hppfs_file_fops = { - .owner = NULL, - .llseek = hppfs_llseek, - .read = hppfs_read, - .write = hppfs_write, - .open = hppfs_open, -}; - -struct hppfs_dirent { - void *vfs_dirent; - filldir_t filldir; - struct dentry *dentry; -}; - -static int hppfs_filldir(void *d, const char *name, int size, - loff_t offset, u64 inode, unsigned int type) -{ - struct hppfs_dirent *dirent = d; - - if (file_removed(dirent->dentry, name)) - return 0; - - return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, - inode, type); -} - -static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - int (*readdir)(struct file *, void *, filldir_t); - struct hppfs_dirent dirent = ((struct hppfs_dirent) - { .vfs_dirent = ent, - .filldir = filldir, - .dentry = file->f_path.dentry - }); - int err; - - readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; - - proc_file->f_pos = file->f_pos; - err = (*readdir)(proc_file, &dirent, hppfs_filldir); - file->f_pos = proc_file->f_pos; - - return err; -} - -static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -{ - return 0; -} - -static const struct file_operations hppfs_dir_fops = { - .owner = NULL, - .readdir = hppfs_readdir, - .open = hppfs_dir_open, - .fsync = hppfs_fsync, -}; - -static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) -{ - sf->f_blocks = 0; - sf->f_bfree = 0; - sf->f_bavail = 0; - sf->f_files = 0; - sf->f_ffree = 0; - sf->f_type = HPPFS_SUPER_MAGIC; - return 0; -} - -static struct inode *hppfs_alloc_inode(struct super_block *sb) -{ - struct hppfs_inode_info *hi; - - hi = kmalloc(sizeof(*hi), GFP_KERNEL); - if (!hi) - return NULL; - - hi->proc_dentry = NULL; - inode_init_once(&hi->vfs_inode); - return &hi->vfs_inode; -} - -void hppfs_delete_inode(struct inode *ino) -{ - dput(HPPFS_I(ino)->proc_dentry); - mntput(ino->i_sb->s_fs_info); - - clear_inode(ino); -} - -static void hppfs_destroy_inode(struct inode *inode) -{ - kfree(HPPFS_I(inode)); -} - -static const struct super_operations hppfs_sbops = { - .alloc_inode = hppfs_alloc_inode, - .destroy_inode = hppfs_destroy_inode, - .delete_inode = hppfs_delete_inode, - .statfs = hppfs_statfs, -}; - -static int hppfs_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct dentry *proc_dentry; - - proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, - buflen); -} - -static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct dentry *proc_dentry; - - proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - - return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); -} - -int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, NULL); -} - -static const struct inode_operations hppfs_dir_iops = { - .lookup = hppfs_lookup, - .permission = hppfs_permission, -}; - -static const struct inode_operations hppfs_link_iops = { - .readlink = hppfs_readlink, - .follow_link = hppfs_follow_link, - .permission = hppfs_permission, -}; - -static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) -{ - struct inode *proc_ino = dentry->d_inode; - struct inode *inode = new_inode(sb); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (S_ISDIR(dentry->d_inode->i_mode)) { - inode->i_op = &hppfs_dir_iops; - inode->i_fop = &hppfs_dir_fops; - } else if (S_ISLNK(dentry->d_inode->i_mode)) { - inode->i_op = &hppfs_link_iops; - inode->i_fop = &hppfs_file_fops; - } else { - inode->i_op = &hppfs_file_iops; - inode->i_fop = &hppfs_file_fops; - } - - HPPFS_I(inode)->proc_dentry = dget(dentry); - - inode->i_uid = proc_ino->i_uid; - inode->i_gid = proc_ino->i_gid; - inode->i_atime = proc_ino->i_atime; - inode->i_mtime = proc_ino->i_mtime; - inode->i_ctime = proc_ino->i_ctime; - inode->i_ino = proc_ino->i_ino; - inode->i_mode = proc_ino->i_mode; - inode->i_nlink = proc_ino->i_nlink; - inode->i_size = proc_ino->i_size; - inode->i_blocks = proc_ino->i_blocks; - - return inode; -} - -static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -{ - struct inode *root_inode; - struct vfsmount *proc_mnt; - int err = -ENOENT; - - proc_mnt = do_kern_mount("proc", 0, "proc", NULL); - if (IS_ERR(proc_mnt)) - goto out; - - sb->s_blocksize = 1024; - sb->s_blocksize_bits = 10; - sb->s_magic = HPPFS_SUPER_MAGIC; - sb->s_op = &hppfs_sbops; - sb->s_fs_info = proc_mnt; - - err = -ENOMEM; - root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root); - if (!root_inode) - goto out_mntput; - - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_iput; - - return 0; - - out_iput: - iput(root_inode); - out_mntput: - mntput(proc_mnt); - out: - return(err); -} - -static int hppfs_read_super(struct file_system_type *type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt) -{ - return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); -} - -static struct file_system_type hppfs_type = { - .owner = THIS_MODULE, - .name = "hppfs", - .get_sb = hppfs_read_super, - .kill_sb = kill_anon_super, - .fs_flags = 0, -}; - -static int __init init_hppfs(void) -{ - return register_filesystem(&hppfs_type); -} - -static void __exit exit_hppfs(void) -{ - unregister_filesystem(&hppfs_type); -} - -module_init(init_hppfs) -module_exit(exit_hppfs) -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 8dc4e37362a5dc910d704d52ac6542bfd49ddc2f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 May 2008 14:02:04 -0700 Subject: ecryptfs: clean up (un)lock_parent dget(dentry->d_parent) --> dget_parent(dentry) unlock_parent() is racy and unnecessary. Replace single caller with unlock_dir(). There are several other suspect uses of ->d_parent in ecryptfs... Signed-off-by: Miklos Szeredi Cc: Michael Halcrow Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 0a1397335a8..c92cc1c00aa 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -37,17 +37,11 @@ static struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir; - dir = dget(dentry->d_parent); + dir = dget_parent(dentry); mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT); return dir; } -static void unlock_parent(struct dentry *dentry) -{ - mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); - dput(dentry->d_parent); -} - static void unlock_dir(struct dentry *dir) { mutex_unlock(&dir->d_inode->i_mutex); @@ -426,8 +420,9 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) int rc = 0; struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; - lock_parent(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); @@ -439,7 +434,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ctime = dir->i_ctime; d_drop(dentry); out_unlock: - unlock_parent(lower_dentry); + unlock_dir(lower_dir_dentry); return rc; } -- cgit v1.2.3 From bb45d64224e5cafe8c8e0d18a20da998e5a7dc93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 May 2008 14:02:06 -0700 Subject: ufs: remove unneeded ufs_put_inode prototype Signed-off-by: Christoph Hellwig Acked-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/ufs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 244a1aaa940..11c035168ea 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern void ufs_put_inode (struct inode *); extern int ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); -- cgit v1.2.3 From 9377abd026bf9bde7db90dac09170034bf6d1cbf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 May 2008 14:02:08 -0700 Subject: quota: don't call sync_fs() from vfs_quota_off() when there's no quota turn off Sometimes, vfs_quota_off() is called on a partially set up super block (for example when fill_super() fails for some reason). In such cases we cannot call ->sync_fs() because it can Oops because of not properly filled in super block. So in case we find there's not quota to turn off, we just skip everything and return which fixes the above problem. [akpm@linux-foundation.org: fxi tpyo] Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dquot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/dquot.c b/fs/dquot.c index dfba1623ccc..5ac77da1995 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1491,6 +1491,16 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) /* We need to serialize quota_off() for device */ mutex_lock(&dqopt->dqonoff_mutex); + + /* + * Skip everything if there's nothing to do. We have to do this because + * sometimes we are called when fill_super() failed and calling + * sync_fs() in such cases does no good. + */ + if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) { + mutex_unlock(&dqopt->dqonoff_mutex); + return 0; + } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; if (type != -1 && cnt != type) -- cgit v1.2.3 From 289f8e27ed435dcbefad132def06f4e84351e94f Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 12 May 2008 14:02:13 -0700 Subject: capabilities: add bounding set to /proc/self/status There is currently no way to query the bounding set of another task. As there appears to be no security reason not to, and as Michael Kerrisk points out the following valid reasons to do so exist: * consistency (I can see all of the other per-thread/process sets in /proc/.../status) * debugging -- I could imagine that it would make the job of debugging an application that uses capabilities a little simpler. this patch adds the bounding set to /proc/self/status right after the effective set. Signed-off-by: Serge E. Hallyn Acked-by: Michael Kerrisk Acked-by: Andrew G. Morgan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index dca997a93bf..9e3b8c33c24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -298,6 +298,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) render_cap_t(m, "CapInh:\t", &p->cap_inheritable); render_cap_t(m, "CapPrm:\t", &p->cap_permitted); render_cap_t(m, "CapEff:\t", &p->cap_effective); + render_cap_t(m, "CapBnd:\t", &p->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, -- cgit v1.2.3 From 706322496b3a58af3cf258db2b553d6933656eef Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 12 May 2008 14:02:21 -0700 Subject: Fix hfsplus oops on image without extents Fix an oops with a corrupted hfs+ image. See http://bugzilla.kernel.org/show_bug.cgi?id=10548 for details. Problem is that we call hfs_btree_open() from hfsplus_fill_super() to set HFSPLUS_SB(sb).[ext_tree|cat_tree] Both trees are still NULL at this moment. If hfs_btree_open() fails for any reason it calls iput() on the page, which gets to hfsplus_releasepage() which tries to access HFSPLUS_SB(sb).* which is still NULL and oopses while dereferencing it. [akpm@linux-foundation.org: build fix] Signed-off-by: Eric Sesterhenn Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d53b2af91c2..67e1c8b467c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -65,6 +65,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) BUG(); return 0; } + if (!tree) + return 0; if (tree->node_size >= PAGE_CACHE_SIZE) { nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); -- cgit v1.2.3 From 4cd1a8fc3d3cd740416b14ece2693dbb5d065eaf Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 12 May 2008 14:02:31 -0700 Subject: memcg: fix possible panic when CONFIG_MM_OWNER=y When mm destruction happens, we should pass mm_update_next_owner() the old mm. But unfortunately new mm is passed in exec_mmap(). Thus, kernel panic is possible when a multi-threaded process uses exec(). Also, the owner member comment description is wrong. mm->owner does not necessarily point to the thread group leader. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Acked-by: Balbir Singh Cc: "Paul Menage" Cc: "KAMEZAWA Hiroyuki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index aeaa9791d8b..1f8a24aa1f8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -736,7 +736,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); - mm_update_next_owner(mm); + mm_update_next_owner(old_mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); -- cgit v1.2.3 From 78bb6cb9a890d3d50ca3b02fce9223d3e734ab9b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 May 2008 14:02:32 -0700 Subject: fuse: add flag to turn on big writes Prior to 2.6.26 fuse only supported single page write requests. In theory all fuse filesystem should be able support bigger than 4k writes, as there's nothing in the API to prevent it. Unfortunately there's a known case in NTFS-3G where big writes cause filesystem corruption. There could also be other filesystems, where the lack of testing with big write requests would result in bugs. To prevent such problems on a kernel upgrade, disable big writes by default, but let filesystems set a flag to turn it on. Signed-off-by: Miklos Szeredi Cc: Szabolcs Szakacsits Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 2 ++ fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 5 ++++- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f28cf8b46f8..8092f0d9fd1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -804,6 +804,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (offset == PAGE_CACHE_SIZE) offset = 0; + if (!fc->big_writes) + break; } while (iov_iter_count(ii) && count < fc->max_write && req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index dadffa21a20..bae948657c4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -404,6 +404,9 @@ struct fuse_conn { /** Is bmap not implemented by fs? */ unsigned no_bmap : 1; + /** Do multi-page cached writes */ + unsigned big_writes : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79b61587383..fb77e096213 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -576,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_lock = 1; if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; + if (arg->flags & FUSE_BIG_WRITES) + fc->big_writes = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -599,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + FUSE_BIG_WRITES; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); -- cgit v1.2.3 From f36f21ecca9ee688301174e5f2e0827827a7a7ff Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 12 May 2008 14:02:33 -0700 Subject: Fix misuses of bdevname() bdevname() fills the buffer that it is given as a parameter, so calling strcpy() or snprintf() on the returned value is redundant (and probably not guaranteed to work - I don't think strcpy and snprintf support overlapping buffers.) Signed-off-by: Jean Delvare Cc: Stephen Tweedie Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/mballoc.c | 6 ++---- fs/jbd2/journal.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fbec2ef9379..b128bdc0f55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2639,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) struct proc_dir_entry *proc; char devname[64]; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); @@ -2674,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) if (sbi->s_mb_proc == NULL) return -EINVAL; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 53632e3e845..2e24567c4a7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -901,7 +901,7 @@ static void jbd2_stats_proc_init(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("history", S_IRUGO, journal->j_proc_entry, @@ -915,7 +915,7 @@ static void jbd2_stats_proc_exit(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); remove_proc_entry(name, proc_jbd2_stats); -- cgit v1.2.3 From 43f14d856f013a4cc63da2c765617c665274338c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 14:02:40 -0700 Subject: eCryptFS: fix imbalanced mutex locking Fix imbalanced calls for mutex lock/unlock on ecryptfs_daemon_hash_mux Revealed by Ingo Molnar: http://lkml.org/lkml/2008/5/7/260 Signed-off-by: Cyrill Gorcunov Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 788995efd1d..6560da1a58c 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -257,12 +257,14 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); printk(KERN_WARNING "%s: Attempt to read from zombified " "daemon\n", __func__); goto out_unlock_daemon; } if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); goto out_unlock_daemon; } /* This daemon will not go away so long as this flag is set */ -- cgit v1.2.3 From b32a09db4fb9a87246ba4e7726a979ac4709ad97 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Feb 2008 09:57:11 -0600 Subject: add match_strlcpy() us it to make v9fs make uname and remotename parsing more robust match_strcpy() is a somewhat creepy function: the caller needs to make sure that the destination buffer is big enough, and when he screws up or forgets, match_strcpy() happily overruns the buffer. There's exactly one customer: v9fs_parse_options(). I believe it currently can't overflow its buffer, but that's not exactly obvious. The source string is a substing of the mount options. The kernel silently truncates those to PAGE_SIZE bytes, including the terminating zero. See compat_sys_mount() and do_mount(). The destination buffer is obtained from __getname(), which allocates from name_cachep, which is initialized by vfs_caches_init() for size PATH_MAX. We're safe as long as PATH_MAX <= PAGE_SIZE. PATH_MAX is 4096. As far as I know, the smallest PAGE_SIZE is also 4096. Here's a patch that makes the code a bit more obviously correct. It doesn't depend on PATH_MAX <= PAGE_SIZE. Signed-off-by: Markus Armbruster Cc: Latchesar Ionkov Cc: Jim Meyering Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 9b0f0222e8b..e307fbd34fa 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -125,10 +125,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->afid = option; break; case Opt_uname: - match_strcpy(v9ses->uname, &args[0]); + match_strlcpy(v9ses->uname, &args[0], PATH_MAX); break; case Opt_remotename: - match_strcpy(v9ses->aname, &args[0]); + match_strlcpy(v9ses->aname, &args[0], PATH_MAX); break; case Opt_nodevmap: v9ses->nodev = 1; -- cgit v1.2.3 From ee443996a35c1e04f210cafd43d5a98d41e46085 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 5 Mar 2008 07:08:09 -0600 Subject: 9p: Documentation updates The kernel-doc comments of much of the 9p system have been in disarray since reorganization. This patch fixes those problems, adds additional documentation and a template book which collects the 9p information. Signed-off-by: Eric Van Hensbergen --- fs/9p/fid.h | 15 ++++++++++ fs/9p/v9fs.c | 8 ++++-- fs/9p/v9fs.h | 85 +++++++++++++++++++++++++++++++++++++++---------------- fs/9p/vfs_addr.c | 2 +- fs/9p/vfs_dir.c | 2 +- fs/9p/vfs_file.c | 11 +++---- fs/9p/vfs_inode.c | 50 +++++++++++++++++++++++--------- fs/9p/vfs_super.c | 1 + 8 files changed, 126 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 26e07df783b..c3bbd6af996 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,6 +22,21 @@ #include +/** + * struct v9fs_dentry - 9p private data stored in dentry d_fsdata + * @lock: protects the fidlist + * @fidlist: list of FIDs currently associated with this dentry + * + * This structure defines the 9p private data associated with + * a particular dentry. In particular, this private data is used + * to lookup which 9P FID handle should be used for a particular VFS + * operation. FID handles are associated with dentries instead of + * inodes in order to more closely map functionality to the Plan 9 + * expected behavior for FID reclaimation and tracking. + * + * See Also: Mapping FIDs to Linux VFS model in + * Design and Implementation of the Linux 9P File System documentation + */ struct v9fs_dentry { spinlock_t lock; /* protect fidlist */ struct list_head fidlist; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index e307fbd34fa..79d310c0018 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -71,7 +71,6 @@ static match_table_t tokens = { /** * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount * @v9ses: existing v9fs session information * */ @@ -256,9 +255,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) } /** - * v9fs_session_cancel - mark transport as disconnected - * and cancel all pending requests. + * v9fs_session_cancel - terminate a session + * @v9ses: session to terminate + * + * mark transport as disconnected and cancel all pending requests. */ + void v9fs_session_cancel(struct v9fs_session_info *v9ses) { P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 7d3a1018db5..a7d56719299 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -21,18 +21,69 @@ * */ -/* - * Session structure provides information for an opened session - * - */ +/** + * enum p9_session_flags - option flags for each 9P session + * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions + * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy + * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) + * @V9FS_ACCESS_ANY: use a single attach for all users + * @V9FS_ACCESS_MASK: bit mask of different ACCESS options + * + * Session flags reflect options selected by users at mount time + */ +enum p9_session_flags { + V9FS_EXTENDED = 0x01, + V9FS_ACCESS_SINGLE = 0x02, + V9FS_ACCESS_USER = 0x04, + V9FS_ACCESS_ANY = 0x06, + V9FS_ACCESS_MASK = 0x06, +}; + +/* possible values of ->cache */ +/** + * enum p9_cache_modes - user specified cache preferences + * @CACHE_NONE: do not cache data, dentries, or directory contents (default) + * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency + * + * eventually support loose, tight, time, session, default always none + */ + +enum p9_cache_modes { + CACHE_NONE, + CACHE_LOOSE, +}; + +/** + * struct v9fs_session_info - per-instance session information + * @flags: session options of type &p9_session_flags + * @nodev: set to 1 to disable device mapping + * @debug: debug level + * @afid: authentication handle + * @cache: cache mode of type &p9_cache_modes + * @options: copy of options string given by user + * @uname: string user name to mount hierarchy as + * @aname: mount specifier for remote hierarchy + * @maxdata: maximum data to be sent/recvd per protocol message + * @dfltuid: default numeric userid to mount hierarchy as + * @dfltgid: default numeric groupid to mount hierarchy as + * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy + * @clnt: reference to 9P network client instantiated for this session + * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug + * + * This structure holds state for each session instance established during + * a sys_mount() . + * + * Bugs: there seems to be a lot of state which could be condensed and/or + * removed. + */ struct v9fs_session_info { /* options */ - unsigned char flags; /* session flags */ - unsigned char nodev; /* set to 1 if no disable device mapping */ - unsigned short debug; /* debug level */ - unsigned int afid; /* authentication fid */ - unsigned int cache; /* cache mode */ + unsigned char flags; + unsigned char nodev; + unsigned short debug; + unsigned int afid; + unsigned int cache; char *options; /* copy of mount options */ char *uname; /* user name to mount as */ @@ -45,22 +96,6 @@ struct v9fs_session_info { struct dentry *debugfs_dir; }; -/* session flags */ -enum { - V9FS_EXTENDED = 0x01, /* 9P2000.u */ - V9FS_ACCESS_MASK = 0x06, /* access mask */ - V9FS_ACCESS_SINGLE = 0x02, /* only one user can access the files */ - V9FS_ACCESS_USER = 0x04, /* attache per user */ - V9FS_ACCESS_ANY = 0x06, /* use the same attach for all users */ -}; - -/* possible values of ->cache */ -/* eventually support loose, tight, time, session, default always none */ -enum { - CACHE_NONE, /* default */ - CACHE_LOOSE, /* no consistency */ -}; - extern struct dentry *v9fs_debugfs_root; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6248f0e727a..97d3aed5798 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -43,7 +43,7 @@ /** * v9fs_vfs_readpage - read an entire page in from 9P * - * @file: file being read + * @filp: file being read * @page: structure to page * */ diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0924d4477da..88e3787c6ea 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat) /** * v9fs_dir_readdir - read a directory - * @filep: opened file structure + * @filp: opened file structure * @dirent: directory structure ??? * @filldir: function to populate directory structure ??? * diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a616fff8906..0d55affe37d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file) /** * v9fs_file_lock - lock a file (or directory) - * @inode: inode to be opened - * @file: file being opened + * @filp: file to be locked + * @cmd: lock command + * @fl: file lock structure * - * XXX - this looks like a local only lock, we should extend into 9P + * Bugs: this looks like a local only lock, we should extend into 9P * by using open exclusive */ @@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) /** * v9fs_file_read - read from a file - * @filep: file pointer to read + * @filp: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data @@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count, /** * v9fs_file_write - write to a file - * @filep: file pointer to write + * @filp: file pointer to write * @data: data buffer to write data from * @count: size of buffer * @offset: offset at which to write data diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6a28842052e..40fa807bd92 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) return res; } +/** + * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits + * @uflags: flags to convert + * + */ + int v9fs_uflags2omode(int uflags) { int ret; @@ -312,6 +318,14 @@ error: } */ +/** + * v9fs_inode_from_fid - populate an inode by issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ + static struct inode * v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) @@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file) /** * v9fs_create - Create a file + * @v9ses: session information + * @dir: directory that dentry is being created in * @dentry: dentry that is being created * @perm: create permissions * @mode: open mode + * @extension: 9p2000.u extension string to support devices, etc. * */ static struct p9_fid * @@ -461,7 +478,7 @@ error: /** * v9fs_vfs_create - VFS hook to create files - * @inode: directory inode that is being created + * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions * @nd: path information @@ -519,7 +536,7 @@ error: /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory - * @inode: inode that is being unlinked + * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * @@ -703,9 +720,9 @@ done: /** * v9fs_vfs_getattr - retrieve file metadata - * @mnt - mount information - * @dentry - file to get attributes on - * @stat - metadata structure to populate + * @mnt: mount information + * @dentry: file to get attributes on + * @stat: metadata structure to populate * */ @@ -928,7 +945,7 @@ done: /** * v9fs_vfs_readlink - read a symlink's location * @dentry: dentry for symlink - * @buf: buffer to load symlink location into + * @buffer: buffer to load symlink location into * @buflen: length of buffer * */ @@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) * v9fs_vfs_put_link - release a symlink path * @dentry: dentry for symlink * @nd: nameidata + * @p: unused * */ -static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +static void +v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); @@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void __putname(s); } +/** + * v9fs_vfs_mkspecial - create a special file + * @dir: inode to create special file in + * @dentry: dentry to create + * @mode: mode to create special file + * @extension: 9p2000.u format extension string representing special file + * + */ + static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int mode, const char *extension) { @@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, * @dentry: dentry for symlink * @symname: symlink data * - * See 9P2000.u RFC for more information + * See Also: 9P2000.u RFC for more information * */ @@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * */ -/* XXX - lots of code dup'd from symlink and creates, - * figure out a better reuse strategy - */ - static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1098,7 +1122,7 @@ clunk_fid: * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation - * @dev_t: device associated with special file + * @rdev: device associated with special file * */ diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index a452ac67fc9..ba10f172626 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data) * v9fs_fill_super - populate superblock with info * @sb: superblock * @v9ses: session information + * @flags: flags propagated from v9fs_get_sb() * */ -- cgit v1.2.3 From ab31267dfeddf80b2e483f077c8b03905993722b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 6 Mar 2008 17:10:28 -0600 Subject: fs/9p/v9fs.c (v9fs_parse_options): Handle kstrdup and match_strdup failure. Now that this function can fail, return an int, diagnose other option-parsing failures, and adjust the sole caller: (v9fs_session_init): Handle kstrdup failure. Propagate any new v9fs_parse_options failure "up". Signed-off-by: Jim Meyering Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Acked-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 79d310c0018..5c1ccaf0416 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -73,16 +73,17 @@ static match_table_t tokens = { * v9fs_parse_options - parse mount options into session structure * @v9ses: existing v9fs session information * + * Return 0 upon success, -ERRNO upon failure. */ -static void v9fs_parse_options(struct v9fs_session_info *v9ses) +static int v9fs_parse_options(struct v9fs_session_info *v9ses) { char *options; substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; char *s, *e; - int ret; + int ret = 0; /* setup defaults */ v9ses->afid = ~0; @@ -90,19 +91,26 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->cache = 0; if (!v9ses->options) - return; + return 0; options = kstrdup(v9ses->options, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); if (token < Opt_uname) { - ret = match_int(&args[0], &option); - if (ret < 0) { + int r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } } @@ -138,6 +146,13 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) case Opt_access: s = match_strdup(&args[0]); + if (!s) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy" + " of option argument\n"); + ret = -ENOMEM; + break; + } v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) v9ses->flags |= V9FS_ACCESS_USER; @@ -157,6 +172,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) } } kfree(options); + return ret; } /** @@ -172,6 +188,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, { int retval = -EINVAL; struct p9_fid *fid; + int rc; v9ses->uname = __getname(); if (!v9ses->uname) @@ -190,7 +207,18 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; v9ses->options = kstrdup(data, GFP_KERNEL); - v9fs_parse_options(v9ses); + if (!v9ses->options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + retval = -ENOMEM; + goto error; + } + + rc = v9fs_parse_options(v9ses); + if (rc < 0) { + retval = rc; + goto error; + } v9ses->clnt = p9_client_create(dev_name, v9ses->options); -- cgit v1.2.3 From 887b3ece65be7b643dfdae0d433c91a26a3f437d Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Thu, 8 May 2008 20:26:37 -0500 Subject: 9p: fix error path during early mount There was some cleanup issues during early mount which would trigger a kernel bug for certain types of failure. This patch reorganizes the cleanup to get rid of the bad behavior. This also merges the 9pnet and 9pnet_fd modules for the purpose of configuration and initialization. Keeping the fd transport separate from the core 9pnet code seemed like a good idea at the time, but in practice has caused more harm and confusion than good. Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 13 +++++++------ fs/9p/vfs_super.c | 34 ++++++++++++++++------------------ 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 5c1ccaf0416..047c791427a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -206,12 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->uid = ~0; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; - v9ses->options = kstrdup(data, GFP_KERNEL); - if (!v9ses->options) { - P9_DPRINTK(P9_DEBUG_ERROR, + if (data) { + v9ses->options = kstrdup(data, GFP_KERNEL); + if (!v9ses->options) { + P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); - retval = -ENOMEM; - goto error; + retval = -ENOMEM; + goto error; + } } rc = v9fs_parse_options(v9ses); @@ -260,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; error: - v9fs_session_close(v9ses); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ba10f172626..bf59c396049 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -128,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, fid = v9fs_session_init(v9ses, dev_name, data); if (IS_ERR(fid)) { retval = PTR_ERR(fid); - fid = NULL; - kfree(v9ses); - v9ses = NULL; - goto error; + goto close_session; } st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto error; + goto clunk_fid; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto error; + goto free_stat; } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); if (IS_ERR(inode)) { retval = PTR_ERR(inode); - goto error; + goto release_sb; } inode->i_uid = uid; @@ -159,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { retval = -ENOMEM; - goto error; + goto release_sb; } sb->s_root = root; @@ -170,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, return simple_set_mnt(mnt, sb); -error: - kfree(st); - if (fid) - p9_client_clunk(fid); - - if (v9ses) { - v9fs_session_close(v9ses); - kfree(v9ses); - } - +release_sb: if (sb) { up_write(&sb->s_umount); deactivate_super(sb); } +free_stat: + kfree(st); + +clunk_fid: + p9_client_clunk(fid); + +close_session: + v9fs_session_close(v9ses); + kfree(v9ses); + return retval; } -- cgit v1.2.3 From 772279c5f1dceb58d451dca94b557fd89b1ce890 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Wed, 14 May 2008 16:05:41 -0700 Subject: jbd: need to hold j_state_lock to updates to transaction t_state to T_COMMIT Updating the current transaction's t_state is protected by j_state_lock. We need to do the same when updating the t_state to T_COMMIT. Signed-off-by: Mingming Cao Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index cd931ef1f00..5a8ca61498c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -470,7 +470,9 @@ void journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); J_ASSERT(commit_transaction->t_nr_buffers <= commit_transaction->t_outstanding_credits); -- cgit v1.2.3 From 7e01c8e5420b6c7f9d85d34c15d8c7a15c9fc720 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Wed, 14 May 2008 16:05:47 -0700 Subject: ext3/4: fix uninitialized bs in ext3/4_xattr_set_handle() This fix the uninitialized bs when we try to replace a xattr entry in ibody with the new value which require more than free space. This situation only happens we format ext3/4 with inode size more than 128 and we have put xattr entries both in ibody and block. The consequences about this bug is we will lost the xattr block which pointed by i_file_acl with all xattr entires in it. We will alloc a new xattr block and put that large value entry in it. The old xattr block will become orphan block. Signed-off-by: Tiger Yang Cc: Cc: Andreas Gruenbacher Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/xattr.c | 5 +++++ fs/ext4/xattr.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index d4a4f0e9ff6..175414ac221 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1000,6 +1000,11 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext3_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT3_I(inode)->i_file_acl && !bs.s.base) { + error = ext3_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext3_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3fbc2c6c3d0..ff08633f398 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1009,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + error = ext4_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext4_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; -- cgit v1.2.3 From dfc5d03f12e706c19ee37734184ea96582ef931d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: correct mount option parsing to detect when quota options can be changed We should not allow user to change quota mount options when quota is just suspended. It would make mount options and internal quota state inconsistent. Also we should not allow user to change quota format when quota is turned on. On the other hand we can just silently ignore when some option is set to the value it already has (mount does this on remount). Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 52dd0679a4e..686ebcc2e6c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -979,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype; + int qtype, qfmt; char *qname; #endif @@ -1162,7 +1162,9 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + !sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change journalled " "quota options when quota turned on.\n"); @@ -1200,7 +1202,9 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " "journalled quota options when " "quota turned on.\n"); @@ -1213,10 +1217,20 @@ clear_qf_name: sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; + qfmt = QFMT_VFS_OLD; + goto set_qf_format; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + qfmt = QFMT_VFS_V0; +set_qf_format: + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_jquota_fmt != qfmt) { + printk(KERN_ERR "EXT4-fs: Cannot change " + "journaled quota options when " + "quota turned on.\n"); + return 0; + } + sbi->s_jquota_fmt = qfmt; break; case Opt_quota: case Opt_usrquota: -- cgit v1.2.3 From cd59e7b9781a35716b8a3e8c4aa2d48081d7daf7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: Fix mount messages when quota disabled When quota is disabled, we should not print 'journaled quota not supported' when user tried to mount non-journaled quota. Also fix typo in the message. Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 686ebcc2e6c..94a527261ca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1255,6 +1255,9 @@ set_qf_format: case Opt_quota: case Opt_usrquota: case Opt_grpquota: + printk(KERN_ERR + "EXT4-fs: quota options not supported.\n"); + break; case Opt_usrjquota: case Opt_grpjquota: case Opt_offusrjquota: @@ -1262,7 +1265,7 @@ set_qf_format: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: printk(KERN_ERR - "EXT4-fs: journalled quota options not " + "EXT4-fs: journaled quota options not " "supported.\n"); break; case Opt_noquota: -- cgit v1.2.3 From 0623543b3335c8e439cacf21af99bbf45da42c5a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: fix synchronization of quota files in journal=data mode In journal=data mode, it is not enough to do write_inode_now as done in vfs_quota_on() to write all data to their final location (which is needed for quota_read to work correctly). Calling journal_flush() does its job. Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94a527261ca..cddf7f0e0fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3170,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (!test_opt(sb, QUOTA)) return -EINVAL; - /* Not journalling quota? */ - if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] && - !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount) + /* When remounting, no checks are needed and in fact, path is NULL */ + if (remount) return vfs_quota_on(sb, type, format_id, path, remount); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; + /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { path_put(&nd.path); return -EXDEV; } - /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); + /* Journaling quota? */ + if (EXT4_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT4-fs: Quota file not on filesystem root. " + "Journaled quota will not work.\n"); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (ext4_should_journal_data(nd.path.dentry->d_inode)) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, remount); } -- cgit v1.2.3 From 2c8be6b222f76c332d9faeb00c047996d340632c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 21:27:55 -0400 Subject: ext4: fix typos in messages and comments (journalled -> journaled) Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cddf7f0e0fd..09d9359c805 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1166,7 +1166,7 @@ set_qf_name: sb_any_quota_suspended(sb)) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT4-fs: Cannot change journalled " + "EXT4-fs: Cannot change journaled " "quota options when quota turned on.\n"); return 0; } @@ -1206,7 +1206,7 @@ clear_qf_name: sb_any_quota_suspended(sb)) && sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " - "journalled quota options when " + "journaled quota options when " "quota turned on.\n"); return 0; } @@ -1350,14 +1350,14 @@ set_qf_format: } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " + printk(KERN_ERR "EXT4-fs: journaled quota format " "not specified.\n"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " - "specified with no journalling " + printk(KERN_ERR "EXT4-fs: journaled quota format " + "specified with no journaling " "enabled.\n"); return 0; } @@ -1598,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, int ret = ext4_quota_on_mount(sb, i); if (ret < 0) printk(KERN_ERR - "EXT4-fs: Cannot turn on journalled " + "EXT4-fs: Cannot turn on journaled " "quota: error %d\n", ret); } } @@ -3123,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); -- cgit v1.2.3 From 1930479c4b6bbcb6f164a5b3498e0d98329967f4 Mon Sep 17 00:00:00 2001 From: Valerie Clement Date: Tue, 13 May 2008 19:31:14 -0400 Subject: ext4: mballoc fix mb_normalize_request algorithm for 1KB block size filesystems In case of inode preallocation, the number of blocks to allocate depends on the file size and it is calculated in ext4_mb_normalize_request(). Each group in the filesystem is then checked to find one that can be used for allocation; this is done in ext4_mb_good_group(). When a file bigger than 4MB is created, the requested number of blocks to preallocate, calculated by ext4_mb_normalize_request is 4096. However for a filesystem with 1KB block size, the maximum size of the block buddies used by the multiblock allocator is 2048, so none of groups in the filesystem satisfies the search criteria in ext4_mb_good_group(). Scanning all the filesystem groups impacts performance. This was demonstrated by using a freshly created, 70GB, 1k block filesystem, with caches dropped write before the test via /proc/sys/vm/drop_caches, and with the filesystem mounted with nodelalloc and nodealloc,nomballoc. The time to write an 8 megabyte file using "dd if=/dev/zero of=/mnt/test/fo bs=8k count=1k conv=fsync" took 35.5091 seconds (236kB/s) with nodellaloc, and 0.233754 seconds (35.9 MB/s) with the nodelloc,nomballoc options. With a 1TB partition, it took several minutes to write 8MB! This patch modifies the algorithm in ext4_mb_normalize_group_request to calculate the number of blocks to allocate by taking into account the maximum size of free blocks chunks handled by the multiblock allocator. It has also been tested for filesystems with 2KB and 4KB block sizes to ensure that those cases don't regress. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Valerie Clement Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b128bdc0f55..1d7fde99452 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2880,12 +2880,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); - /* max available blocks in a free group */ - max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - - EXT4_SB(ac->ac_sb)->s_itb_per_group; + /* max size of free chunks */ + max = 2 << bsbits; -#define NRL_CHECK_SIZE(req, size, max,bits) \ - (req <= (size) || max <= ((size) >> bits)) +#define NRL_CHECK_SIZE(req, size, max, chunk_size) \ + (req <= (size) || max <= (chunk_size)) /* first, try to predict filesize */ /* XXX: should this table be tunable? */ @@ -2904,16 +2903,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = 512 * 1024; } else if (size <= 1024 * 1024) { size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { + } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (20 - bsbits)) << 20; - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { + (21 - bsbits)) << 21; + size = 2 * 1024 * 1024; + } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (22 - bsbits)) << 22; size = 4 * 1024 * 1024; } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, - (8<<20)>>bsbits, max, bsbits)) { + (8<<20)>>bsbits, max, 8 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (23 - bsbits)) << 23; size = 8 * 1024 * 1024; -- cgit v1.2.3 From 519deca0496a4df07d15acf3181ca5d573bffdec Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 15 May 2008 14:43:20 -0400 Subject: ext4: Retry block allocation if new blocks are allocated from system zone. If the block allocator gets blocks out of system zone ext4 calls ext4_error. But if the file system is mounted with errors=continue retry block allocation. We need to mark the system zone blocks as in use to make sure retry don't pick them again System zone is the block range mapping block bitmap, inode bitmap and inode table. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 17 +++++++++++------ fs/ext4/mballoc.c | 47 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index da994374ec3..30494c5da84 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -287,11 +287,11 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) (int)block_group, (unsigned long long)bitmap_blk); return NULL; } - if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { - put_bh(bh); - return NULL; - } - + ext4_valid_block_bitmap(sb, desc, block_group, bh); + /* + * file system mounted not to panic on error, + * continue with corrupt bitmap + */ return bh; } /* @@ -1770,7 +1770,12 @@ allocated: "Allocating block in system zone - " "blocks from %llu, length %lu", ret_block, num); - goto out; + /* + * claim_block marked the blocks we allocated + * as in use. So we may want to selectively + * mark some of the blocks as free + */ + goto retry_alloc; } performed_allocation = 1; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1d7fde99452..873ad9b3418 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2736,7 +2736,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; - int err; + int err, len; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); @@ -2770,14 +2770,27 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ac->ac_b_ex.fe_start + le32_to_cpu(es->s_first_data_block); - if (block == ext4_block_bitmap(sb, gdp) || - block == ext4_inode_bitmap(sb, gdp) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - + len = ac->ac_b_ex.fe_len; + if (in_range(ext4_block_bitmap(sb, gdp), block, len) || + in_range(ext4_inode_bitmap(sb, gdp), block, len) || + in_range(block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(block + len - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) { ext4_error(sb, __func__, "Allocating block in system zone - block = %llu", block); + /* File system mounted not to panic on error + * Fix the bitmap and repeat the block allocation + * We leak some of the blocks here. + */ + mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), + bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (!err) + err = -EAGAIN; + goto out_err; } #ifdef AGGRESSIVE_CHECK { @@ -4032,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); - repeat: /* allocate space in core */ ext4_mb_regular_allocator(ac); @@ -4046,10 +4058,21 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(ac, handle); - *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - ar->len = ac->ac_b_ex.fe_len; + *errp = ext4_mb_mark_diskspace_used(ac, handle); + if (*errp == -EAGAIN) { + ac->ac_b_ex.fe_group = 0; + ac->ac_b_ex.fe_start = 0; + ac->ac_b_ex.fe_len = 0; + ac->ac_status = AC_STATUS_CONTINUE; + goto repeat; + } else if (*errp) { + ac->ac_b_ex.fe_len = 0; + ar->len = 0; + ext4_mb_show_ac(ac); + } else { + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; + } } else { freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) @@ -4236,6 +4259,8 @@ do_more: ext4_error(sb, __func__, "Freeing blocks in system zone - " "Block = %lu, count = %lu", block, count); + /* err = 0. ext4_std_error should be a no op */ + goto error_return; } BUFFER_TRACE(bitmap_bh, "getting write access"); -- cgit v1.2.3 From 02c471cb17203c748e9bc87003052c1f46e5df69 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Thu, 15 May 2008 14:46:17 -0400 Subject: jbd2: update transaction t_state to T_COMMIT fix Updating the current transaction's t_state is protected by j_state_lock. We need to do the same when updating the t_state to T_COMMIT. Acked-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrew Morton --- fs/jbd2/commit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e0139786f71..4d99685fdce 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -560,7 +560,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, -- cgit v1.2.3 From 9a6ab769bdacc65e7d4e931034e12e02c357c4d3 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 16 May 2008 11:20:25 -0700 Subject: byteorder: don't directly include linux/byteorder/generic.h Use asm/byteorder.h instead. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- fs/befs/endian.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/befs/endian.h b/fs/befs/endian.h index e254a20869f..6cb84d896d0 100644 --- a/fs/befs/endian.h +++ b/fs/befs/endian.h @@ -9,7 +9,7 @@ #ifndef LINUX_BEFS_ENDIAN #define LINUX_BEFS_ENDIAN -#include +#include static inline u64 fs64_to_cpu(const struct super_block *sb, fs64 n) -- cgit v1.2.3