From 411b67b4b6a4dd1e0292a6a58dd753978179d173 Mon Sep 17 00:00:00 2001 From: Kostik Belousov Date: Wed, 28 Sep 2005 18:21:28 +0300 Subject: [PATCH] readv/writev syscalls are not checked by lsm it seems that readv(2)/writev(2) syscalls do not call file_permission callback. Looks like this is overlook. I have filled the issue into redhat bugzilla as https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=169433 and got the recommendation to post this on lsm mailing list. The following trivial patch solves the problem. Signed-off-by: Kostik Belousov Signed-off-by: Chris Wright --- fs/read_write.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index b60324aaa2b..a091ee4f430 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -497,6 +497,9 @@ static ssize_t do_readv_writev(int type, struct file *file, } ret = rw_verify_area(type, file, pos, tot_len); + if (ret) + goto out; + ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); if (ret) goto out; -- cgit v1.2.3 From 192eaa28ba7b44485e521df7ba7a2ccbc4cc4d13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Sep 2005 03:15:08 +0100 Subject: [PATCH] missing ERR_PTR in 9fs Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/9p/vfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 0957f4da91d..82c5b008407 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -129,7 +129,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - return newfid; + return ERR_PTR(newfid); } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); -- cgit v1.2.3 From 998765e5588b197737d457e16f72832d8036190f Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:54 -0700 Subject: [PATCH] aio: lock around kiocbTryKick() Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick patch could set the Kicked bit before acquiring the ctx_lock and putting the iocb on the run list. The run path, while holding the ctx_lock, could see this partial kick and mistake it for a kick that was deferred while it was doing work with the run_list NULLed out. It would then race with the kick thread to add the iocb to the run list. This patch moves the kick setting under the ctx_lock so that only one of the kick or run path queues the iocb on the run list, as intended. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 0e11e31dbb7..b8f296999c0 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -899,16 +899,24 @@ static void aio_kick_handler(void *data) * and if required activate the aio work queue to process * it */ -static void queue_kicked_iocb(struct kiocb *iocb) +static void try_queue_kicked_iocb(struct kiocb *iocb) { struct kioctx *ctx = iocb->ki_ctx; unsigned long flags; int run = 0; - WARN_ON((!list_empty(&iocb->ki_wait.task_list))); + /* We're supposed to be the only path putting the iocb back on the run + * list. If we find that the iocb is *back* on a wait queue already + * than retry has happened before we could queue the iocb. This also + * means that the retry could have completed and freed our iocb, no + * good. */ + BUG_ON((!list_empty(&iocb->ki_wait.task_list))); spin_lock_irqsave(&ctx->ctx_lock, flags); - run = __queue_kicked_iocb(iocb); + /* set this inside the lock so that we can't race with aio_run_iocb() + * testing it and putting the iocb on the run list under the lock */ + if (!kiocbTryKick(iocb)) + run = __queue_kicked_iocb(iocb); spin_unlock_irqrestore(&ctx->ctx_lock, flags); if (run) aio_queue_work(ctx); @@ -931,10 +939,7 @@ void fastcall kick_iocb(struct kiocb *iocb) return; } - /* If its already kicked we shouldn't queue it again */ - if (!kiocbTryKick(iocb)) { - queue_kicked_iocb(iocb); - } + try_queue_kicked_iocb(iocb); } EXPORT_SYMBOL(kick_iocb); -- cgit v1.2.3 From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:55 -0700 Subject: [PATCH] aio: remove unlocked task_list test and resulting race Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 79 +++++++++++++++++++++++++++------------------------------------- 1 file changed, 33 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b8f296999c0..9edc0e4a121 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) ret = retry(iocb); current->io_wait = NULL; - if (-EIOCBRETRY != ret) { - if (-EIOCBQUEUED != ret) { - BUG_ON(!list_empty(&iocb->ki_wait.task_list)); - aio_complete(iocb, ret, 0); - /* must not access the iocb after this */ - } - } else { - /* - * Issue an additional retry to avoid waiting forever if - * no waits were queued (e.g. in case of a short read). - */ - if (list_empty(&iocb->ki_wait.task_list)) - kiocbSetKicked(iocb); + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { + BUG_ON(!list_empty(&iocb->ki_wait.task_list)); + aio_complete(iocb, ret, 0); } out: spin_lock_irq(&ctx->ctx_lock); @@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) } /* - * Default retry method for aio_read (also used for first time submit) - * Responsible for updating iocb state as retries progress + * aio_p{read,write} are the default ki_retry methods for + * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially + * multiple calls to f_op->aio_read(). They loop around partial progress + * instead of returning -EIOCBRETRY because they don't have the means to call + * kick_iocb(). */ static ssize_t aio_pread(struct kiocb *iocb) { @@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb) struct inode *inode = mapping->host; ssize_t ret = 0; - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + do { + ret = file->f_op->aio_read(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + /* + * Can't just depend on iocb->ki_left to determine + * whether we are done. This may have been a short read. + */ + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } - /* - * Can't just depend on iocb->ki_left to determine - * whether we are done. This may have been a short read. - */ - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; /* - * For pipes and sockets we return once we have - * some data; for regular files we retry till we - * complete the entire read or find that we can't - * read any more data (e.g short reads). + * For pipes and sockets we return once we have some data; for + * regular files we retry till we complete the entire read or + * find that we can't read any more data (e.g short reads). */ - if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)) - ret = -EIOCBRETRY; - } + } while (ret > 0 && + !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); /* This means we must have transferred all that we could */ /* No need to retry anymore */ @@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb) return ret; } -/* - * Default retry method for aio_write (also used for first time submit) - * Responsible for updating iocb state as retries progress - */ +/* see aio_pread() */ static ssize_t aio_pwrite(struct kiocb *iocb) { struct file *file = iocb->ki_filp; ssize_t ret = 0; - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); - - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - - ret = -EIOCBRETRY; - } + do { + ret = file->f_op->aio_write(iocb, iocb->ki_buf, + iocb->ki_left, iocb->ki_pos); + if (ret > 0) { + iocb->ki_buf += ret; + iocb->ki_left -= ret; + } + } while (ret > 0); - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; -- cgit v1.2.3 From 353fb07e2043d2df12dddf4e2c39552d0ab9b026 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:56 -0700 Subject: [PATCH] aio: avoid extra aio_{read,write} call when ki_left == 0 Recently aio_p{read,write} changed to perform retries internally rather than returning -EIOCBRETRY. This inadvertantly resulted in always calling aio_{read,write} with ki_left at 0 which would in turn immediately return 0. Harmless, but we can avoid this call by checking in the caller. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 9edc0e4a121..d6b1551342b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1347,7 +1347,7 @@ static ssize_t aio_pread(struct kiocb *iocb) * regular files we retry till we complete the entire read or * find that we can't read any more data (e.g short reads). */ - } while (ret > 0 && + } while (ret > 0 && iocb->ki_left > 0 && !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); /* This means we must have transferred all that we could */ @@ -1371,7 +1371,7 @@ static ssize_t aio_pwrite(struct kiocb *iocb) iocb->ki_buf += ret; iocb->ki_left -= ret; } - } while (ret > 0); + } while (ret > 0 && iocb->ki_left > 0); if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; -- cgit v1.2.3 From daa35edc0a967d1f77c2e2c1346f57d04371487a Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Fri, 30 Sep 2005 11:59:01 -0700 Subject: [PATCH] uml: remove empty hostfs_truncate method Calling truncate() on hostfs spits a kernel warning "Something isn't implemented here", but it still works fine. Indeed, hostfs i_op->truncate doesn't do anything. But hostfs_setattr() -> set_attr() correctly detects ATTR_SIZE and calls truncate() on the host. So we should be safe (using ftruncate() may be better, in case the file is unlinked on the host, but we aren't sure to have the file open for writing, and reopening it would cause the same races; plus nobody should expect UML to be so careful). So, the warning is wrong, because the current implementation is working. Al, am I correct, and can the warning be therefore dropped? CC: Al Viro Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs_kern.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 59c5062cd63..dd711310626 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -793,11 +793,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return(err); } -void hostfs_truncate(struct inode *ino) -{ - not_implemented(); -} - int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) { char *name; @@ -894,7 +889,6 @@ static struct inode_operations hostfs_iops = { .rmdir = hostfs_rmdir, .mknod = hostfs_mknod, .rename = hostfs_rename, - .truncate = hostfs_truncate, .permission = hostfs_permission, .setattr = hostfs_setattr, .getattr = hostfs_getattr, @@ -910,7 +904,6 @@ static struct inode_operations hostfs_dir_iops = { .rmdir = hostfs_rmdir, .mknod = hostfs_mknod, .rename = hostfs_rename, - .truncate = hostfs_truncate, .permission = hostfs_permission, .setattr = hostfs_setattr, .getattr = hostfs_getattr, -- cgit v1.2.3 From dd190d066b7ded8c44b2b67dd0a14bed01525d3c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 30 Sep 2005 11:59:02 -0700 Subject: [PATCH] fuse: check O_DIRECT Check O_DIRECT and return -EINVAL error in open. dentry_open() also checks this but only after the open method is called. This patch optimizes away the unnecessary upcalls in this case. It could be a correctness issue too: if filesystem has open() with side effect, then it should fail before doing the open, not after. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6454022b053..657ab11c173 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -23,6 +23,10 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff; int err; + /* VFS checks this, but only _after_ ->open() */ + if (file->f_flags & O_DIRECT) + return -EINVAL; + err = generic_file_open(inode, file); if (err) return err; -- cgit v1.2.3 From 18efefa9355119b4f6d9b73b074ebbf9882c37c3 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 4 Oct 2005 13:06:00 +0100 Subject: NTFS: Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the count to become negative and hence we had a wild memset() scribbling all over the system's ram. Signed-off-by: Anton Altaparmakov --- fs/ntfs/ChangeLog | 3 +++ fs/ntfs/bitmap.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 83f3322765c..de58579a1d0 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -102,6 +102,9 @@ ToDo/Notes: inode instead of a vfs inode as parameter. - Fix the definition of the CHKD ntfs record magic. It had an off by two error causing it to be CHKB instead of CHKD. + - Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the + count to become negative and hence we had a wild memset() scribbling + all over the system's ram. 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c index 12cf2e30c7d..7a190cdc60e 100644 --- a/fs/ntfs/bitmap.c +++ b/fs/ntfs/bitmap.c @@ -1,7 +1,7 @@ /* * bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project. * - * Copyright (c) 2004 Anton Altaparmakov + * Copyright (c) 2004-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -90,7 +90,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, /* If the first byte is partial, modify the appropriate bits in it. */ if (bit) { u8 *byte = kaddr + pos; - while ((bit & 7) && cnt--) { + while ((bit & 7) && cnt) { + cnt--; if (value) *byte |= 1 << bit++; else -- cgit v1.2.3 From c394e458b69632902d65f9e2f39df79314f72908 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Tue, 4 Oct 2005 13:08:53 +0100 Subject: NTFS: Fix a 64-bitness bug where a left-shift could overflow a 32-bit variable which we now cast to 64-bit first (fs/ntfs/mft.c::map_mft_record_page(). Signed-off-by: Anton Altaparmakov --- fs/ntfs/layout.h | 2 +- fs/ntfs/mft.c | 3 ++- fs/ntfs/unistr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 01f2dfa39ce..5c248d404f0 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -309,7 +309,7 @@ typedef le16 MFT_RECORD_FLAGS; * Note: The _LE versions will return a CPU endian formatted value! */ #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL -#define MFT_REF_MASK_LE const_cpu_to_le64(0x0000ffffffffffffULL) +#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU) typedef u64 MFT_REF; typedef le64 leMFT_REF; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 247586d1d5d..b011369b595 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -58,7 +58,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) * overflowing the unsigned long, but I don't think we would ever get * here if the volume was that big... */ - index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; + index = (u64)ni->mft_no << vol->mft_record_size_bits >> + PAGE_CACHE_SHIFT; ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; i_size = i_size_read(mft_vi); diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index a389a5a16c8..0ea887fc859 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -1,7 +1,7 @@ /* * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published -- cgit v1.2.3 From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Oct 2005 17:43:06 +0100 Subject: [PATCH] bfs endianness annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bfs/dir.c | 2 +- fs/bfs/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index e240c335eb2..5af928fa044 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -108,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_mapping->a_ops = &bfs_aops; inode->i_mode = mode; inode->i_ino = ino; - BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); + BFS_I(inode)->i_dsk_ino = ino; BFS_I(inode)->i_sblock = 0; BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index c7b39aa279d..868af0f224f 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -357,7 +357,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; info->si_lf_sblk = 0; -- cgit v1.2.3 From c2b513dfbb04d7c94cca145172cfeb91f7683e54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Oct 2005 17:48:44 +0100 Subject: [PATCH] bfs iget() abuses bfs_fill_super() walks the inode table to get the bitmap of free inodes and collect stats. It has no business using iget() for that - it's a lot of extra work, extra icache pollution and more complex code. Switched to walking the damn thing directly. Note: that also allows to kill ->i_dsk_ino in there - separate patch if Tigran can confirm that this field can be zero only for deleted inodes (i.e. something that could only be found during that scan and not by normal lookups). Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bfs/inode.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 868af0f224f..3af6c73c5b5 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -362,23 +362,41 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) info->si_lf_eblk = 0; info->si_lf_sblk = 0; info->si_lf_ioff = 0; + bh = NULL; for (i=BFS_ROOT_INO; i<=info->si_lasti; i++) { - inode = iget(s,i); - if (BFS_I(inode)->i_dsk_ino == 0) + struct bfs_inode *di; + int block = (i - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; + unsigned long sblock, eblock; + + if (!off) { + brelse(bh); + bh = sb_bread(s, block); + } + + if (!bh) + continue; + + di = (struct bfs_inode *)bh->b_data + off; + + if (!di->i_ino) { info->si_freei++; - else { - set_bit(i, info->si_imap); - info->si_freeb -= inode->i_blocks; - if (BFS_I(inode)->i_eblock > info->si_lf_eblk) { - info->si_lf_eblk = BFS_I(inode)->i_eblock; - info->si_lf_sblk = BFS_I(inode)->i_sblock; - info->si_lf_ioff = BFS_INO2OFF(i); - } + continue; + } + set_bit(i, info->si_imap); + info->si_freeb -= BFS_FILEBLOCKS(di); + + sblock = le32_to_cpu(di->i_sblock); + eblock = le32_to_cpu(di->i_eblock); + if (eblock > info->si_lf_eblk) { + info->si_lf_eblk = eblock; + info->si_lf_sblk = sblock; + info->si_lf_ioff = BFS_INO2OFF(i); } - iput(inode); } + brelse(bh); if (!(s->s_flags & MS_RDONLY)) { - mark_buffer_dirty(bh); + mark_buffer_dirty(info->si_sbh); s->s_dirt = 1; } dump_imap("read_super", s); -- cgit v1.2.3 From 829841146878e082613a49581ae252c071057c23 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Oct 2005 21:54:21 -0700 Subject: Avoid 'names_cache' memory leak with CONFIG_AUDITSYSCALL The nameidata "last.name" is always allocated with "__getname()", and should always be free'd with "__putname()". Using "putname()" without the underscores will leak memory, because the allocation will have been hidden from the AUDITSYSCALL code. Arguably the real bug is that the AUDITSYSCALL code is really broken, but in the meantime this fixes the problem people see. Reported by Robert Derr, patch by Rick Lindsley. Acked-by: Al Viro Signed-off-by: Linus Torvalds --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 043d587216b..aa62dbda93a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1551,19 +1551,19 @@ do_link: if (nd->last_type != LAST_NORM) goto exit; if (nd->last.name[nd->last.len]) { - putname(nd->last.name); + __putname(nd->last.name); goto exit; } error = -ELOOP; if (count++==32) { - putname(nd->last.name); + __putname(nd->last.name); goto exit; } dir = nd->dentry; down(&dir->d_inode->i_sem); path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); path.mnt = nd->mnt; - putname(nd->last.name); + __putname(nd->last.name); goto do_last; } -- cgit v1.2.3 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/bio.c | 10 +++++----- fs/buffer.c | 2 +- fs/mpage.c | 2 +- fs/ntfs/malloc.h | 2 +- fs/posix_acl.c | 6 +++--- fs/xfs/linux-2.6/kmem.c | 10 +++++----- fs/xfs/linux-2.6/kmem.h | 13 ++++++------- 7 files changed, 22 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 83a34957456..7d81a93afd4 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -75,7 +75,7 @@ struct bio_set { */ static struct bio_set *fs_bio_set; -static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) +static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { struct bio_vec *bvl; struct biovec_slab *bp; @@ -155,7 +155,7 @@ inline void bio_init(struct bio *bio) * allocate bio and iovecs from the memory pools specified by the * bio_set structure. **/ -struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, struct bio_set *bs) +struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); @@ -181,7 +181,7 @@ out: return bio; } -struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); @@ -277,7 +277,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src) * * Like __bio_clone, only also allocates the returned bio */ -struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) +struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) { struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); @@ -1078,7 +1078,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) return bp; } -static void *bio_pair_alloc(unsigned int __nocast gfp_flags, void *data) +static void *bio_pair_alloc(gfp_t gfp_flags, void *data) { return kmalloc(sizeof(struct bio_pair), gfp_flags); } diff --git a/fs/buffer.c b/fs/buffer.c index 6cbfceabd95..1216c0d3c8c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3045,7 +3045,7 @@ static void recalc_bh_state(void) buffer_heads_over_limit = (tot > max_buffer_heads); } -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags) +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { diff --git a/fs/mpage.c b/fs/mpage.c index bb9aebe9386..c5adcdddf3c 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -102,7 +102,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) static struct bio * mpage_alloc(struct block_device *bdev, sector_t first_sector, int nr_vecs, - unsigned int __nocast gfp_flags) + gfp_t gfp_flags) { struct bio *bio; diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 006946efca8..590887b943f 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -40,7 +40,7 @@ * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ static inline void *__ntfs_malloc(unsigned long size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 296480e96dd..6c8dcf7613f 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(posix_acl_permission); * Allocate a new ACL with the specified number of entries. */ struct posix_acl * -posix_acl_alloc(int count, unsigned int __nocast flags) +posix_acl_alloc(int count, gfp_t flags) { const size_t size = sizeof(struct posix_acl) + count * sizeof(struct posix_acl_entry); @@ -51,7 +51,7 @@ posix_acl_alloc(int count, unsigned int __nocast flags) * Clone an ACL. */ struct posix_acl * -posix_acl_clone(const struct posix_acl *acl, unsigned int __nocast flags) +posix_acl_clone(const struct posix_acl *acl, gfp_t flags) { struct posix_acl *clone = NULL; @@ -185,7 +185,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(mode_t mode, unsigned int __nocast flags) +posix_acl_from_mode(mode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 4b184559f23..d2653b589b1 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,7 +45,7 @@ void * -kmem_alloc(size_t size, unsigned int __nocast flags) +kmem_alloc(size_t size, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -67,7 +67,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) } void * -kmem_zalloc(size_t size, unsigned int __nocast flags) +kmem_zalloc(size_t size, gfp_t flags) { void *ptr; @@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size) void * kmem_realloc(void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) + gfp_t flags) { void *new; @@ -105,7 +105,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) { int retries = 0; unsigned int lflags = kmem_flags_convert(flags); @@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 109fcf27e25..ee7010f085b 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -81,7 +81,7 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) +static __inline unsigned int kmem_flags_convert(gfp_t flags) { unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ @@ -125,13 +125,12 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, gfp_t); +extern void *kmem_zone_alloc(kmem_zone_t *, gfp_t); -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(void *, size_t, size_t, - unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_alloc(size_t, gfp_t); +extern void *kmem_realloc(void *, size_t, size_t, gfp_t); +extern void *kmem_zalloc(size_t, gfp_t); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; -- cgit v1.2.3 From 1cc956e12aedfdc6baf6312bc36a6b5a71af3c9d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 9 Oct 2005 10:41:32 -0500 Subject: [PATCH] relayfs: fix bogus param value in call to vmap The third param in this call to vmap shouldn't be GFP_KERNEL, which makes no sense, but rather VM_MAP. Thanks to Al Viro for spotting this. Signed-off-by: Tom Zanussi Signed-off-by: Linus Torvalds --- fs/relayfs/buffers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c index 2aa8e271999..84e21ffa5ca 100644 --- a/fs/relayfs/buffers.c +++ b/fs/relayfs/buffers.c @@ -109,7 +109,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size) if (unlikely(!buf->page_array[i])) goto depopulate; } - mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL); + mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL); if (!mem) goto depopulate; -- cgit v1.2.3 From 19cba8abd6ca09527c194864ae651db65cbacfe1 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 11 Oct 2005 08:29:03 -0700 Subject: [PATCH] v9fs: remove additional buffer allocation from v9fs_file_read and v9fs_file_write v9fs_file_read and v9fs_file_write use kmalloc to allocate buffers as big as the data buffer received as parameter. kmalloc cannot be used to allocate buffers bigger than 128K, so reading/writing data in chunks bigger than 128k fails. This patch reorganizes v9fs_file_read and v9fs_file_write to allocate only buffers as big as the maximum data that can be sent in one 9P message. Signed-off-by: Latchesar Ionkov Cc: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_file.c | 114 ++++++++++++++++--------------------------------------- 1 file changed, 33 insertions(+), 81 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a4799e971d1..bbc3cc63854 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -175,16 +175,16 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) } /** - * v9fs_read - read from a file (internal) + * v9fs_file_read - read from a file * @filep: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data * */ - static ssize_t -v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) +v9fs_file_read(struct file *filp, char __user * data, size_t count, + loff_t * offset) { struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -194,6 +194,7 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) int rsize = 0; int result = 0; int total = 0; + int n; dprintk(DEBUG_VFS, "\n"); @@ -216,10 +217,15 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) } else *offset += result; - /* XXX - extra copy */ - memcpy(buffer, fcall->params.rread.data, result); + n = copy_to_user(data, fcall->params.rread.data, result); + if (n) { + dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n); + kfree(fcall); + return -EFAULT; + } + count -= result; - buffer += result; + data += result; total += result; kfree(fcall); @@ -232,42 +238,7 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) } /** - * v9fs_file_read - read from a file - * @filep: file pointer to read - * @data: data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ - -static ssize_t -v9fs_file_read(struct file *filp, char __user * data, size_t count, - loff_t * offset) -{ - int retval = -1; - int ret = 0; - char *buffer; - - buffer = kmalloc(count, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - retval = v9fs_read(filp, buffer, count, offset); - if (retval > 0) { - if ((ret = copy_to_user(data, buffer, retval)) != 0) { - dprintk(DEBUG_ERROR, "Problem copying to user %d\n", - ret); - retval = ret; - } - } - - kfree(buffer); - - return retval; -} - -/** - * v9fs_write - write to a file + * v9fs_file_write - write to a file * @filep: file pointer to write * @data: data buffer to write data from * @count: size of buffer @@ -276,7 +247,8 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count, */ static ssize_t -v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) +v9fs_file_write(struct file *filp, const char __user * data, + size_t count, loff_t * offset) { struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -286,30 +258,42 @@ v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) int result = -EIO; int rsize = 0; int total = 0; + char *buf; - dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count, + dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); rsize = v9ses->maxdata - V9FS_IOHDRSZ; if (v9fid->iounit != 0 && rsize > v9fid->iounit) rsize = v9fid->iounit; - dump_data(buffer, count); + buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL); + if (!buf) + return -ENOMEM; do { if (count < rsize) rsize = count; - result = - v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall); + result = copy_from_user(buf, data, rsize); + if (result) { + dprintk(DEBUG_ERROR, "Problem copying from user\n"); + kfree(buf); + return -EFAULT; + } + + dump_data(buf, rsize); + result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall); if (result < 0) { eprintk(KERN_ERR, "error while writing: %s(%d)\n", FCALL_ERROR(fcall), result); kfree(fcall); + kfree(buf); return result; } else *offset += result; kfree(fcall); + fcall = NULL; if (result != rsize) { eprintk(KERN_ERR, @@ -319,46 +303,14 @@ v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) } count -= result; - buffer += result; + data += result; total += result; } while (count); + kfree(buf); return total; } -/** - * v9fs_file_write - write to a file - * @filep: file pointer to write - * @data: data buffer to write data from - * @count: size of buffer - * @offset: offset at which to write data - * - */ - -static ssize_t -v9fs_file_write(struct file *filp, const char __user * data, - size_t count, loff_t * offset) -{ - int ret = -1; - char *buffer; - - buffer = kmalloc(count, GFP_KERNEL); - if (buffer == NULL) - return -ENOMEM; - - ret = copy_from_user(buffer, data, count); - if (ret) { - dprintk(DEBUG_ERROR, "Problem copying from user\n"); - ret = -EFAULT; - } else { - ret = v9fs_write(filp, buffer, count, offset); - } - - kfree(buffer); - - return ret; -} - struct file_operations v9fs_file_operations = { .llseek = generic_file_llseek, .read = v9fs_file_read, -- cgit v1.2.3 From 22c1ea44f0d33eda532883858b6cdabc5f265b66 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Oct 2005 08:29:05 -0700 Subject: [PATCH] nfsacl: Solaris VxFS compatibility fix Here is a compatibility fix between Linux and Solaris when used with VxFS filesystems: Solaris usually accepts acl entries in any order, but with VxFS it replies with NFSERR_INVAL when it sees a four-entry acl that is not in canonical form. It may also fail with other non-canonical acls -- I can't tell, because that case never triggers: We only send non-canonical acls when we fake up an ACL_MASK entry. Instead of adding fake ACL_MASK entries at the end, inserting them in the correct position makes Solaris+VxFS happy. The Linux client and server sides don't care about entry order. The three-entry-acl special case in which we need a fake ACL_MASK entry was handled in xdr_nfsace_encode. The patch moves this into nfsacl_encode. Signed-off-by: Andreas Gruenbacher Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs_common/nfsacl.c | 70 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 251e5a1bb1c..0c2be8c0307 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -48,43 +48,26 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) (struct nfsacl_encode_desc *) desc; u32 *p = (u32 *) elem; - if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { - struct posix_acl_entry *entry = - &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; - *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); - switch(entry->e_tag) { - case ACL_USER_OBJ: - *p++ = htonl(nfsacl_desc->uid); - break; - case ACL_GROUP_OBJ: - *p++ = htonl(nfsacl_desc->gid); - break; - case ACL_USER: - case ACL_GROUP: - *p++ = htonl(entry->e_id); - break; - default: /* Solaris depends on that! */ - *p++ = 0; - break; - } - *p++ = htonl(entry->e_perm & S_IRWXO); - } else { - const struct posix_acl_entry *pa, *pe; - int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; - - FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { - if (pa->e_tag == ACL_GROUP_OBJ) { - group_obj_perm = pa->e_perm & S_IRWXO; - break; - } - } - /* fake up ACL_MASK entry */ - *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); - *p++ = htonl(0); - *p++ = htonl(group_obj_perm); + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; } - + *p++ = htonl(entry->e_perm & S_IRWXO); return 0; } @@ -105,11 +88,28 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, .gid = inode->i_gid, }; int err; + struct posix_acl *acl2 = NULL; if (entries > NFS_ACL_MAX_ENTRIES || xdr_encode_word(buf, base, entries)) return -EINVAL; + if (encode_entries && acl && acl->a_count == 3) { + /* Fake up an ACL_MASK entry. */ + acl2 = posix_acl_alloc(4, GFP_KERNEL); + if (!acl2) + return -ENOMEM; + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (acl2) + posix_acl_release(acl2); if (!err) err = 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; -- cgit v1.2.3 From 6de505173e24e76bb33a2595312e0c2b44d49e58 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Tue, 11 Oct 2005 08:29:08 -0700 Subject: [PATCH] binfmt_elf bss padding fix Nir Tzachar points out that if an ELF file specifies a zero-length bss at a whacky address, we cannot load that binary because padzero() tries to zero out the end of the page at the whacky address, and that may not be writeable. See also http://bugzilla.kernel.org/show_bug.cgi?id=5411 So teach load_elf_binary() to skip the bss settng altogether if the elf file has a zero-length bss segment. Cc: Roland McGrath Cc: Daniel Jacobowitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7976a238f0a..d4b15576e58 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -905,7 +905,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) send_sig(SIGKILL, current, 0); goto out_free_dentry; } - if (padzero(elf_bss)) { + if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { send_sig(SIGSEGV, current, 0); retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; -- cgit v1.2.3